1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
251c5d0c4SDavid S. Miller #include <linux/rcupdate.h>
351c5d0c4SDavid S. Miller #include <linux/spinlock.h>
451c5d0c4SDavid S. Miller #include <linux/jiffies.h>
5ab92bb2fSDavid S. Miller #include <linux/module.h>
64aabd8efSDavid S. Miller #include <linux/cache.h>
751c5d0c4SDavid S. Miller #include <linux/slab.h>
851c5d0c4SDavid S. Miller #include <linux/init.h>
94aabd8efSDavid S. Miller #include <linux/tcp.h>
105815d5e7SEric Dumazet #include <linux/hash.h>
11d23ff701SJulian Anastasov #include <linux/tcp_metrics.h>
12976a702aSEric Dumazet #include <linux/vmalloc.h>
134aabd8efSDavid S. Miller
144aabd8efSDavid S. Miller #include <net/inet_connection_sock.h>
1551c5d0c4SDavid S. Miller #include <net/net_namespace.h>
16ab92bb2fSDavid S. Miller #include <net/request_sock.h>
1751c5d0c4SDavid S. Miller #include <net/inetpeer.h>
184aabd8efSDavid S. Miller #include <net/sock.h>
1951c5d0c4SDavid S. Miller #include <net/ipv6.h>
204aabd8efSDavid S. Miller #include <net/dst.h>
214aabd8efSDavid S. Miller #include <net/tcp.h>
22d23ff701SJulian Anastasov #include <net/genetlink.h>
234aabd8efSDavid S. Miller
2441804420SDavid S. Miller static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
2541804420SDavid S. Miller const struct inetpeer_addr *daddr,
2677f99ad1SChristoph Paasch struct net *net, unsigned int hash);
2777f99ad1SChristoph Paasch
281fe4c481SYuchung Cheng struct tcp_fastopen_metrics {
291fe4c481SYuchung Cheng u16 mss;
302646c831SDaniel Lee u16 syn_loss:10, /* Recurring Fast Open SYN losses */
312646c831SDaniel Lee try_exp:2; /* Request w/ exp. option (once) */
32aab48743SYuchung Cheng unsigned long last_syn_loss; /* Last Fast Open SYN loss */
331fe4c481SYuchung Cheng struct tcp_fastopen_cookie cookie;
341fe4c481SYuchung Cheng };
351fe4c481SYuchung Cheng
36740b0f18SEric Dumazet /* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
37740b0f18SEric Dumazet * Kernel only stores RTT and RTTVAR in usec resolution
38740b0f18SEric Dumazet */
39740b0f18SEric Dumazet #define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
40740b0f18SEric Dumazet
4151c5d0c4SDavid S. Miller struct tcp_metrics_block {
4251c5d0c4SDavid S. Miller struct tcp_metrics_block __rcu *tcpm_next;
43d5d986ceSEric Dumazet struct net *tcpm_net;
44a5443028SChristoph Paasch struct inetpeer_addr tcpm_saddr;
45324fd55aSChristoph Paasch struct inetpeer_addr tcpm_daddr;
4651c5d0c4SDavid S. Miller unsigned long tcpm_stamp;
4751c5d0c4SDavid S. Miller u32 tcpm_lock;
48740b0f18SEric Dumazet u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
491fe4c481SYuchung Cheng struct tcp_fastopen_metrics tcpm_fastopen;
50d23ff701SJulian Anastasov
51d23ff701SJulian Anastasov struct rcu_head rcu_head;
5251c5d0c4SDavid S. Miller };
5351c5d0c4SDavid S. Miller
tm_net(const struct tcp_metrics_block * tm)54d5d986ceSEric Dumazet static inline struct net *tm_net(const struct tcp_metrics_block *tm)
55849e8a0cSEric W. Biederman {
56d5d986ceSEric Dumazet /* Paired with the WRITE_ONCE() in tcpm_new() */
57d5d986ceSEric Dumazet return READ_ONCE(tm->tcpm_net);
58849e8a0cSEric W. Biederman }
59849e8a0cSEric W. Biederman
tcp_metric_locked(struct tcp_metrics_block * tm,enum tcp_metric_index idx)6051c5d0c4SDavid S. Miller static bool tcp_metric_locked(struct tcp_metrics_block *tm,
6151c5d0c4SDavid S. Miller enum tcp_metric_index idx)
6251c5d0c4SDavid S. Miller {
63285ce119SEric Dumazet /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
64285ce119SEric Dumazet return READ_ONCE(tm->tcpm_lock) & (1 << idx);
6551c5d0c4SDavid S. Miller }
6651c5d0c4SDavid S. Miller
tcp_metric_get(const struct tcp_metrics_block * tm,enum tcp_metric_index idx)678c4d04f6SEric Dumazet static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
6851c5d0c4SDavid S. Miller enum tcp_metric_index idx)
6951c5d0c4SDavid S. Miller {
708c4d04f6SEric Dumazet /* Paired with WRITE_ONCE() in tcp_metric_set() */
718c4d04f6SEric Dumazet return READ_ONCE(tm->tcpm_vals[idx]);
7251c5d0c4SDavid S. Miller }
7351c5d0c4SDavid S. Miller
tcp_metric_set(struct tcp_metrics_block * tm,enum tcp_metric_index idx,u32 val)7451c5d0c4SDavid S. Miller static void tcp_metric_set(struct tcp_metrics_block *tm,
7551c5d0c4SDavid S. Miller enum tcp_metric_index idx,
7651c5d0c4SDavid S. Miller u32 val)
7751c5d0c4SDavid S. Miller {
788c4d04f6SEric Dumazet /* Paired with READ_ONCE() in tcp_metric_get() */
798c4d04f6SEric Dumazet WRITE_ONCE(tm->tcpm_vals[idx], val);
8051c5d0c4SDavid S. Miller }
8151c5d0c4SDavid S. Miller
addr_same(const struct inetpeer_addr * a,const struct inetpeer_addr * b)8251c5d0c4SDavid S. Miller static bool addr_same(const struct inetpeer_addr *a,
8351c5d0c4SDavid S. Miller const struct inetpeer_addr *b)
8451c5d0c4SDavid S. Miller {
85e6638094SEric Dumazet return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
8651c5d0c4SDavid S. Miller }
8751c5d0c4SDavid S. Miller
8851c5d0c4SDavid S. Miller struct tcpm_hash_bucket {
8951c5d0c4SDavid S. Miller struct tcp_metrics_block __rcu *chain;
9051c5d0c4SDavid S. Miller };
9151c5d0c4SDavid S. Miller
92098a697bSEric W. Biederman static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
93098a697bSEric W. Biederman static unsigned int tcp_metrics_hash_log __read_mostly;
94098a697bSEric W. Biederman
9551c5d0c4SDavid S. Miller static DEFINE_SPINLOCK(tcp_metrics_lock);
96ddf251faSEric Dumazet static DEFINE_SEQLOCK(fastopen_seqlock);
9751c5d0c4SDavid S. Miller
tcpm_suck_dst(struct tcp_metrics_block * tm,const struct dst_entry * dst,bool fastopen_clear)98740b0f18SEric Dumazet static void tcpm_suck_dst(struct tcp_metrics_block *tm,
99740b0f18SEric Dumazet const struct dst_entry *dst,
100efeaa555SEric Dumazet bool fastopen_clear)
10151c5d0c4SDavid S. Miller {
102740b0f18SEric Dumazet u32 msval;
10351c5d0c4SDavid S. Miller u32 val;
10451c5d0c4SDavid S. Miller
105949ad62aSEric Dumazet WRITE_ONCE(tm->tcpm_stamp, jiffies);
1069a0a9502SJulian Anastasov
10751c5d0c4SDavid S. Miller val = 0;
10851c5d0c4SDavid S. Miller if (dst_metric_locked(dst, RTAX_RTT))
10951c5d0c4SDavid S. Miller val |= 1 << TCP_METRIC_RTT;
11051c5d0c4SDavid S. Miller if (dst_metric_locked(dst, RTAX_RTTVAR))
11151c5d0c4SDavid S. Miller val |= 1 << TCP_METRIC_RTTVAR;
11251c5d0c4SDavid S. Miller if (dst_metric_locked(dst, RTAX_SSTHRESH))
11351c5d0c4SDavid S. Miller val |= 1 << TCP_METRIC_SSTHRESH;
11451c5d0c4SDavid S. Miller if (dst_metric_locked(dst, RTAX_CWND))
11551c5d0c4SDavid S. Miller val |= 1 << TCP_METRIC_CWND;
11651c5d0c4SDavid S. Miller if (dst_metric_locked(dst, RTAX_REORDERING))
11751c5d0c4SDavid S. Miller val |= 1 << TCP_METRIC_REORDERING;
118285ce119SEric Dumazet /* Paired with READ_ONCE() in tcp_metric_locked() */
119285ce119SEric Dumazet WRITE_ONCE(tm->tcpm_lock, val);
12051c5d0c4SDavid S. Miller
121740b0f18SEric Dumazet msval = dst_metric_raw(dst, RTAX_RTT);
1228c4d04f6SEric Dumazet tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
123740b0f18SEric Dumazet
124740b0f18SEric Dumazet msval = dst_metric_raw(dst, RTAX_RTTVAR);
1258c4d04f6SEric Dumazet tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
1268c4d04f6SEric Dumazet tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
1278c4d04f6SEric Dumazet dst_metric_raw(dst, RTAX_SSTHRESH));
1288c4d04f6SEric Dumazet tcp_metric_set(tm, TCP_METRIC_CWND,
1298c4d04f6SEric Dumazet dst_metric_raw(dst, RTAX_CWND));
1308c4d04f6SEric Dumazet tcp_metric_set(tm, TCP_METRIC_REORDERING,
1318c4d04f6SEric Dumazet dst_metric_raw(dst, RTAX_REORDERING));
132efeaa555SEric Dumazet if (fastopen_clear) {
133ddf251faSEric Dumazet write_seqlock(&fastopen_seqlock);
1341fe4c481SYuchung Cheng tm->tcpm_fastopen.mss = 0;
135aab48743SYuchung Cheng tm->tcpm_fastopen.syn_loss = 0;
1362646c831SDaniel Lee tm->tcpm_fastopen.try_exp = 0;
1372646c831SDaniel Lee tm->tcpm_fastopen.cookie.exp = false;
1381fe4c481SYuchung Cheng tm->tcpm_fastopen.cookie.len = 0;
139ddf251faSEric Dumazet write_sequnlock(&fastopen_seqlock);
14051c5d0c4SDavid S. Miller }
141efeaa555SEric Dumazet }
14251c5d0c4SDavid S. Miller
14377f99ad1SChristoph Paasch #define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
14477f99ad1SChristoph Paasch
tcpm_check_stamp(struct tcp_metrics_block * tm,const struct dst_entry * dst)145949ad62aSEric Dumazet static void tcpm_check_stamp(struct tcp_metrics_block *tm,
146949ad62aSEric Dumazet const struct dst_entry *dst)
14777f99ad1SChristoph Paasch {
148949ad62aSEric Dumazet unsigned long limit;
149949ad62aSEric Dumazet
150949ad62aSEric Dumazet if (!tm)
151949ad62aSEric Dumazet return;
152949ad62aSEric Dumazet limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
153949ad62aSEric Dumazet if (unlikely(time_after(jiffies, limit)))
15477f99ad1SChristoph Paasch tcpm_suck_dst(tm, dst, false);
15577f99ad1SChristoph Paasch }
15677f99ad1SChristoph Paasch
15777f99ad1SChristoph Paasch #define TCP_METRICS_RECLAIM_DEPTH 5
15877f99ad1SChristoph Paasch #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
15977f99ad1SChristoph Paasch
1609f1ab186SEric Dumazet #define deref_locked(p) \
1619f1ab186SEric Dumazet rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock))
1629f1ab186SEric Dumazet
tcpm_new(struct dst_entry * dst,struct inetpeer_addr * saddr,struct inetpeer_addr * daddr,unsigned int hash)16351c5d0c4SDavid S. Miller static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
164a5443028SChristoph Paasch struct inetpeer_addr *saddr,
165324fd55aSChristoph Paasch struct inetpeer_addr *daddr,
16677f99ad1SChristoph Paasch unsigned int hash)
16751c5d0c4SDavid S. Miller {
16851c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
16951c5d0c4SDavid S. Miller struct net *net;
17077f99ad1SChristoph Paasch bool reclaim = false;
17151c5d0c4SDavid S. Miller
17251c5d0c4SDavid S. Miller spin_lock_bh(&tcp_metrics_lock);
17351c5d0c4SDavid S. Miller net = dev_net(dst->dev);
17477f99ad1SChristoph Paasch
17577f99ad1SChristoph Paasch /* While waiting for the spin-lock the cache might have been populated
17677f99ad1SChristoph Paasch * with this entry and so we have to check again.
17777f99ad1SChristoph Paasch */
17841804420SDavid S. Miller tm = __tcp_get_metrics(saddr, daddr, net, hash);
17977f99ad1SChristoph Paasch if (tm == TCP_METRICS_RECLAIM_PTR) {
18077f99ad1SChristoph Paasch reclaim = true;
18177f99ad1SChristoph Paasch tm = NULL;
18277f99ad1SChristoph Paasch }
18377f99ad1SChristoph Paasch if (tm) {
18477f99ad1SChristoph Paasch tcpm_check_stamp(tm, dst);
18577f99ad1SChristoph Paasch goto out_unlock;
18677f99ad1SChristoph Paasch }
18777f99ad1SChristoph Paasch
18851c5d0c4SDavid S. Miller if (unlikely(reclaim)) {
18951c5d0c4SDavid S. Miller struct tcp_metrics_block *oldest;
19051c5d0c4SDavid S. Miller
1919f1ab186SEric Dumazet oldest = deref_locked(tcp_metrics_hash[hash].chain);
1929f1ab186SEric Dumazet for (tm = deref_locked(oldest->tcpm_next); tm;
1939f1ab186SEric Dumazet tm = deref_locked(tm->tcpm_next)) {
194949ad62aSEric Dumazet if (time_before(READ_ONCE(tm->tcpm_stamp),
195949ad62aSEric Dumazet READ_ONCE(oldest->tcpm_stamp)))
19651c5d0c4SDavid S. Miller oldest = tm;
19751c5d0c4SDavid S. Miller }
19851c5d0c4SDavid S. Miller tm = oldest;
19951c5d0c4SDavid S. Miller } else {
200ddf251faSEric Dumazet tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
20151c5d0c4SDavid S. Miller if (!tm)
20251c5d0c4SDavid S. Miller goto out_unlock;
20351c5d0c4SDavid S. Miller }
204d5d986ceSEric Dumazet /* Paired with the READ_ONCE() in tm_net() */
205d5d986ceSEric Dumazet WRITE_ONCE(tm->tcpm_net, net);
206d5d986ceSEric Dumazet
207a5443028SChristoph Paasch tm->tcpm_saddr = *saddr;
208324fd55aSChristoph Paasch tm->tcpm_daddr = *daddr;
20951c5d0c4SDavid S. Miller
210ddf251faSEric Dumazet tcpm_suck_dst(tm, dst, reclaim);
21151c5d0c4SDavid S. Miller
21251c5d0c4SDavid S. Miller if (likely(!reclaim)) {
213098a697bSEric W. Biederman tm->tcpm_next = tcp_metrics_hash[hash].chain;
214098a697bSEric W. Biederman rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm);
21551c5d0c4SDavid S. Miller }
21651c5d0c4SDavid S. Miller
21751c5d0c4SDavid S. Miller out_unlock:
21851c5d0c4SDavid S. Miller spin_unlock_bh(&tcp_metrics_lock);
21951c5d0c4SDavid S. Miller return tm;
22051c5d0c4SDavid S. Miller }
22151c5d0c4SDavid S. Miller
tcp_get_encode(struct tcp_metrics_block * tm,int depth)22251c5d0c4SDavid S. Miller static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth)
22351c5d0c4SDavid S. Miller {
22451c5d0c4SDavid S. Miller if (tm)
22551c5d0c4SDavid S. Miller return tm;
22651c5d0c4SDavid S. Miller if (depth > TCP_METRICS_RECLAIM_DEPTH)
22751c5d0c4SDavid S. Miller return TCP_METRICS_RECLAIM_PTR;
22851c5d0c4SDavid S. Miller return NULL;
22951c5d0c4SDavid S. Miller }
23051c5d0c4SDavid S. Miller
__tcp_get_metrics(const struct inetpeer_addr * saddr,const struct inetpeer_addr * daddr,struct net * net,unsigned int hash)231a5443028SChristoph Paasch static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
232a5443028SChristoph Paasch const struct inetpeer_addr *daddr,
23351c5d0c4SDavid S. Miller struct net *net, unsigned int hash)
23451c5d0c4SDavid S. Miller {
23551c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
23651c5d0c4SDavid S. Miller int depth = 0;
23751c5d0c4SDavid S. Miller
238098a697bSEric W. Biederman for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
23951c5d0c4SDavid S. Miller tm = rcu_dereference(tm->tcpm_next)) {
240a5443028SChristoph Paasch if (addr_same(&tm->tcpm_saddr, saddr) &&
241849e8a0cSEric W. Biederman addr_same(&tm->tcpm_daddr, daddr) &&
242849e8a0cSEric W. Biederman net_eq(tm_net(tm), net))
24351c5d0c4SDavid S. Miller break;
24451c5d0c4SDavid S. Miller depth++;
24551c5d0c4SDavid S. Miller }
24651c5d0c4SDavid S. Miller return tcp_get_encode(tm, depth);
24751c5d0c4SDavid S. Miller }
24851c5d0c4SDavid S. Miller
__tcp_get_metrics_req(struct request_sock * req,struct dst_entry * dst)24951c5d0c4SDavid S. Miller static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
25051c5d0c4SDavid S. Miller struct dst_entry *dst)
25151c5d0c4SDavid S. Miller {
25251c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
253a5443028SChristoph Paasch struct inetpeer_addr saddr, daddr;
25451c5d0c4SDavid S. Miller unsigned int hash;
25551c5d0c4SDavid S. Miller struct net *net;
25651c5d0c4SDavid S. Miller
257a5443028SChristoph Paasch saddr.family = req->rsk_ops->family;
258324fd55aSChristoph Paasch daddr.family = req->rsk_ops->family;
259324fd55aSChristoph Paasch switch (daddr.family) {
26051c5d0c4SDavid S. Miller case AF_INET:
2613abef286SDavid Ahern inetpeer_set_addr_v4(&saddr, inet_rsk(req)->ir_loc_addr);
2623abef286SDavid Ahern inetpeer_set_addr_v4(&daddr, inet_rsk(req)->ir_rmt_addr);
26372afa352SDavid Ahern hash = ipv4_addr_hash(inet_rsk(req)->ir_rmt_addr);
26451c5d0c4SDavid S. Miller break;
265634fb979SEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
26651c5d0c4SDavid S. Miller case AF_INET6:
2673abef286SDavid Ahern inetpeer_set_addr_v6(&saddr, &inet_rsk(req)->ir_v6_loc_addr);
2683abef286SDavid Ahern inetpeer_set_addr_v6(&daddr, &inet_rsk(req)->ir_v6_rmt_addr);
269634fb979SEric Dumazet hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
27051c5d0c4SDavid S. Miller break;
271634fb979SEric Dumazet #endif
27251c5d0c4SDavid S. Miller default:
27351c5d0c4SDavid S. Miller return NULL;
27451c5d0c4SDavid S. Miller }
27551c5d0c4SDavid S. Miller
27651c5d0c4SDavid S. Miller net = dev_net(dst->dev);
2773e5da62dSEric W. Biederman hash ^= net_hash_mix(net);
278098a697bSEric W. Biederman hash = hash_32(hash, tcp_metrics_hash_log);
27951c5d0c4SDavid S. Miller
280098a697bSEric W. Biederman for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
28151c5d0c4SDavid S. Miller tm = rcu_dereference(tm->tcpm_next)) {
282a5443028SChristoph Paasch if (addr_same(&tm->tcpm_saddr, &saddr) &&
283849e8a0cSEric W. Biederman addr_same(&tm->tcpm_daddr, &daddr) &&
284849e8a0cSEric W. Biederman net_eq(tm_net(tm), net))
28551c5d0c4SDavid S. Miller break;
28651c5d0c4SDavid S. Miller }
28751c5d0c4SDavid S. Miller tcpm_check_stamp(tm, dst);
28851c5d0c4SDavid S. Miller return tm;
28951c5d0c4SDavid S. Miller }
29051c5d0c4SDavid S. Miller
tcp_get_metrics(struct sock * sk,struct dst_entry * dst,bool create)29151c5d0c4SDavid S. Miller static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
29251c5d0c4SDavid S. Miller struct dst_entry *dst,
29351c5d0c4SDavid S. Miller bool create)
29451c5d0c4SDavid S. Miller {
29551c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
296a5443028SChristoph Paasch struct inetpeer_addr saddr, daddr;
29751c5d0c4SDavid S. Miller unsigned int hash;
29851c5d0c4SDavid S. Miller struct net *net;
29951c5d0c4SDavid S. Miller
3003ad88cf7SChristoph Paasch if (sk->sk_family == AF_INET) {
3013abef286SDavid Ahern inetpeer_set_addr_v4(&saddr, inet_sk(sk)->inet_saddr);
3023abef286SDavid Ahern inetpeer_set_addr_v4(&daddr, inet_sk(sk)->inet_daddr);
30372afa352SDavid Ahern hash = ipv4_addr_hash(inet_sk(sk)->inet_daddr);
3043ad88cf7SChristoph Paasch }
305c2bb06dbSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
3063ad88cf7SChristoph Paasch else if (sk->sk_family == AF_INET6) {
3073ad88cf7SChristoph Paasch if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
3083abef286SDavid Ahern inetpeer_set_addr_v4(&saddr, inet_sk(sk)->inet_saddr);
3093abef286SDavid Ahern inetpeer_set_addr_v4(&daddr, inet_sk(sk)->inet_daddr);
31072afa352SDavid Ahern hash = ipv4_addr_hash(inet_sk(sk)->inet_daddr);
3113ad88cf7SChristoph Paasch } else {
3123abef286SDavid Ahern inetpeer_set_addr_v6(&saddr, &sk->sk_v6_rcv_saddr);
3133abef286SDavid Ahern inetpeer_set_addr_v6(&daddr, &sk->sk_v6_daddr);
314efe4208fSEric Dumazet hash = ipv6_addr_hash(&sk->sk_v6_daddr);
31551c5d0c4SDavid S. Miller }
3163ad88cf7SChristoph Paasch }
3173ad88cf7SChristoph Paasch #endif
3183ad88cf7SChristoph Paasch else
3193ad88cf7SChristoph Paasch return NULL;
32051c5d0c4SDavid S. Miller
32151c5d0c4SDavid S. Miller net = dev_net(dst->dev);
3223e5da62dSEric W. Biederman hash ^= net_hash_mix(net);
323098a697bSEric W. Biederman hash = hash_32(hash, tcp_metrics_hash_log);
32451c5d0c4SDavid S. Miller
325a5443028SChristoph Paasch tm = __tcp_get_metrics(&saddr, &daddr, net, hash);
32677f99ad1SChristoph Paasch if (tm == TCP_METRICS_RECLAIM_PTR)
32751c5d0c4SDavid S. Miller tm = NULL;
32851c5d0c4SDavid S. Miller if (!tm && create)
32941804420SDavid S. Miller tm = tcpm_new(dst, &saddr, &daddr, hash);
33051c5d0c4SDavid S. Miller else
33151c5d0c4SDavid S. Miller tcpm_check_stamp(tm, dst);
33251c5d0c4SDavid S. Miller
33351c5d0c4SDavid S. Miller return tm;
33451c5d0c4SDavid S. Miller }
33551c5d0c4SDavid S. Miller
3364aabd8efSDavid S. Miller /* Save metrics learned by this TCP session. This function is called
3374aabd8efSDavid S. Miller * only, when TCP finishes successfully i.e. when it enters TIME-WAIT
3384aabd8efSDavid S. Miller * or goes from LAST-ACK to CLOSE.
3394aabd8efSDavid S. Miller */
tcp_update_metrics(struct sock * sk)3404aabd8efSDavid S. Miller void tcp_update_metrics(struct sock *sk)
3414aabd8efSDavid S. Miller {
34251c5d0c4SDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk);
3434aabd8efSDavid S. Miller struct dst_entry *dst = __sk_dst_get(sk);
34451c5d0c4SDavid S. Miller struct tcp_sock *tp = tcp_sk(sk);
3451043e25fSNikolay Borisov struct net *net = sock_net(sk);
34651c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
34751c5d0c4SDavid S. Miller unsigned long rtt;
34851c5d0c4SDavid S. Miller u32 val;
34951c5d0c4SDavid S. Miller int m;
3504aabd8efSDavid S. Miller
351c3a2e837SJulian Anastasov sk_dst_confirm(sk);
3528499a245SKuniyuki Iwashima if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
3534aabd8efSDavid S. Miller return;
3544aabd8efSDavid S. Miller
35551c5d0c4SDavid S. Miller rcu_read_lock();
356740b0f18SEric Dumazet if (icsk->icsk_backoff || !tp->srtt_us) {
3574aabd8efSDavid S. Miller /* This session failed to estimate rtt. Why?
35851c5d0c4SDavid S. Miller * Probably, no packets returned in time. Reset our
35951c5d0c4SDavid S. Miller * results.
3604aabd8efSDavid S. Miller */
36151c5d0c4SDavid S. Miller tm = tcp_get_metrics(sk, dst, false);
36251c5d0c4SDavid S. Miller if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT))
36351c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_RTT, 0);
36451c5d0c4SDavid S. Miller goto out_unlock;
36551c5d0c4SDavid S. Miller } else
36651c5d0c4SDavid S. Miller tm = tcp_get_metrics(sk, dst, true);
3674aabd8efSDavid S. Miller
36851c5d0c4SDavid S. Miller if (!tm)
36951c5d0c4SDavid S. Miller goto out_unlock;
37051c5d0c4SDavid S. Miller
371740b0f18SEric Dumazet rtt = tcp_metric_get(tm, TCP_METRIC_RTT);
372740b0f18SEric Dumazet m = rtt - tp->srtt_us;
3734aabd8efSDavid S. Miller
37451c5d0c4SDavid S. Miller /* If newly calculated rtt larger than stored one, store new
37551c5d0c4SDavid S. Miller * one. Otherwise, use EWMA. Remember, rtt overestimation is
37651c5d0c4SDavid S. Miller * always better than underestimation.
3774aabd8efSDavid S. Miller */
37851c5d0c4SDavid S. Miller if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
3794aabd8efSDavid S. Miller if (m <= 0)
380740b0f18SEric Dumazet rtt = tp->srtt_us;
3814aabd8efSDavid S. Miller else
38251c5d0c4SDavid S. Miller rtt -= (m >> 3);
383740b0f18SEric Dumazet tcp_metric_set(tm, TCP_METRIC_RTT, rtt);
3844aabd8efSDavid S. Miller }
3854aabd8efSDavid S. Miller
38651c5d0c4SDavid S. Miller if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
3874aabd8efSDavid S. Miller unsigned long var;
38851c5d0c4SDavid S. Miller
3894aabd8efSDavid S. Miller if (m < 0)
3904aabd8efSDavid S. Miller m = -m;
3914aabd8efSDavid S. Miller
3924aabd8efSDavid S. Miller /* Scale deviation to rttvar fixed point */
3934aabd8efSDavid S. Miller m >>= 1;
394740b0f18SEric Dumazet if (m < tp->mdev_us)
395740b0f18SEric Dumazet m = tp->mdev_us;
3964aabd8efSDavid S. Miller
397740b0f18SEric Dumazet var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);
3984aabd8efSDavid S. Miller if (m >= var)
3994aabd8efSDavid S. Miller var = m;
4004aabd8efSDavid S. Miller else
4014aabd8efSDavid S. Miller var -= (var - m) >> 2;
4024aabd8efSDavid S. Miller
403740b0f18SEric Dumazet tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);
4044aabd8efSDavid S. Miller }
4054aabd8efSDavid S. Miller
4064aabd8efSDavid S. Miller if (tcp_in_initial_slowstart(tp)) {
4074aabd8efSDavid S. Miller /* Slow start still did not finish. */
408ab1ba21bSKuniyuki Iwashima if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
40965e6d901SKevin(Yudong) Yang !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
41051c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
41140570375SEric Dumazet if (val && (tcp_snd_cwnd(tp) >> 1) > val)
41251c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
41340570375SEric Dumazet tcp_snd_cwnd(tp) >> 1);
41451c5d0c4SDavid S. Miller }
41551c5d0c4SDavid S. Miller if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
41651c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_CWND);
41740570375SEric Dumazet if (tcp_snd_cwnd(tp) > val)
41851c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_CWND,
41940570375SEric Dumazet tcp_snd_cwnd(tp));
42051c5d0c4SDavid S. Miller }
421071d5080SYuchung Cheng } else if (!tcp_in_slow_start(tp) &&
4224aabd8efSDavid S. Miller icsk->icsk_ca_state == TCP_CA_Open) {
4234aabd8efSDavid S. Miller /* Cong. avoidance phase, cwnd is reliable. */
424ab1ba21bSKuniyuki Iwashima if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
42565e6d901SKevin(Yudong) Yang !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
42651c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
42740570375SEric Dumazet max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
42851c5d0c4SDavid S. Miller if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
42951c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_CWND);
43040570375SEric Dumazet tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1);
43151c5d0c4SDavid S. Miller }
4324aabd8efSDavid S. Miller } else {
4334aabd8efSDavid S. Miller /* Else slow start did not finish, cwnd is non-sense,
43451c5d0c4SDavid S. Miller * ssthresh may be also invalid.
4354aabd8efSDavid S. Miller */
43651c5d0c4SDavid S. Miller if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
43751c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_CWND);
43851c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_CWND,
43951c5d0c4SDavid S. Miller (val + tp->snd_ssthresh) >> 1);
4404aabd8efSDavid S. Miller }
441ab1ba21bSKuniyuki Iwashima if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
44265e6d901SKevin(Yudong) Yang !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
44351c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
44451c5d0c4SDavid S. Miller if (val && tp->snd_ssthresh > val)
44551c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
44651c5d0c4SDavid S. Miller tp->snd_ssthresh);
44751c5d0c4SDavid S. Miller }
44851c5d0c4SDavid S. Miller if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
44951c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
45051c5d0c4SDavid S. Miller if (val < tp->reordering &&
45146778cd1SKuniyuki Iwashima tp->reordering !=
45246778cd1SKuniyuki Iwashima READ_ONCE(net->ipv4.sysctl_tcp_reordering))
45351c5d0c4SDavid S. Miller tcp_metric_set(tm, TCP_METRIC_REORDERING,
45451c5d0c4SDavid S. Miller tp->reordering);
4554aabd8efSDavid S. Miller }
4564aabd8efSDavid S. Miller }
457949ad62aSEric Dumazet WRITE_ONCE(tm->tcpm_stamp, jiffies);
45851c5d0c4SDavid S. Miller out_unlock:
45951c5d0c4SDavid S. Miller rcu_read_unlock();
4604aabd8efSDavid S. Miller }
4614aabd8efSDavid S. Miller
4624aabd8efSDavid S. Miller /* Initialize metrics on socket. */
4634aabd8efSDavid S. Miller
tcp_init_metrics(struct sock * sk)4644aabd8efSDavid S. Miller void tcp_init_metrics(struct sock *sk)
4654aabd8efSDavid S. Miller {
4664aabd8efSDavid S. Miller struct dst_entry *dst = __sk_dst_get(sk);
46751c5d0c4SDavid S. Miller struct tcp_sock *tp = tcp_sk(sk);
46865e6d901SKevin(Yudong) Yang struct net *net = sock_net(sk);
46951c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
4701b7fdd2aSYuchung Cheng u32 val, crtt = 0; /* cached RTT scaled by 8 */
4714aabd8efSDavid S. Miller
472c3a2e837SJulian Anastasov sk_dst_confirm(sk);
473ce2ef6ecSEric Dumazet /* ssthresh may have been reduced unnecessarily during.
474ce2ef6ecSEric Dumazet * 3WHS. Restore it back to its initial default.
475ce2ef6ecSEric Dumazet */
476ce2ef6ecSEric Dumazet tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
47751456b29SIan Morris if (!dst)
4784aabd8efSDavid S. Miller goto reset;
4794aabd8efSDavid S. Miller
48051c5d0c4SDavid S. Miller rcu_read_lock();
4817faa294fSEric Dumazet tm = tcp_get_metrics(sk, dst, false);
48251c5d0c4SDavid S. Miller if (!tm) {
48351c5d0c4SDavid S. Miller rcu_read_unlock();
48451c5d0c4SDavid S. Miller goto reset;
48551c5d0c4SDavid S. Miller }
48651c5d0c4SDavid S. Miller
48751c5d0c4SDavid S. Miller if (tcp_metric_locked(tm, TCP_METRIC_CWND))
48851c5d0c4SDavid S. Miller tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
48951c5d0c4SDavid S. Miller
490ab1ba21bSKuniyuki Iwashima val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
49165e6d901SKevin(Yudong) Yang 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
49251c5d0c4SDavid S. Miller if (val) {
49351c5d0c4SDavid S. Miller tp->snd_ssthresh = val;
4944aabd8efSDavid S. Miller if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
4954aabd8efSDavid S. Miller tp->snd_ssthresh = tp->snd_cwnd_clamp;
4964aabd8efSDavid S. Miller }
49751c5d0c4SDavid S. Miller val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
498713bafeaSYuchung Cheng if (val && tp->reordering != val)
49951c5d0c4SDavid S. Miller tp->reordering = val;
5004aabd8efSDavid S. Miller
501740b0f18SEric Dumazet crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
50251c5d0c4SDavid S. Miller rcu_read_unlock();
5034aabd8efSDavid S. Miller reset:
50452f20e65SYuchung Cheng /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
50552f20e65SYuchung Cheng * to seed the RTO for later data packets because SYN packets are
50652f20e65SYuchung Cheng * small. Use the per-dst cached values to seed the RTO but keep
50752f20e65SYuchung Cheng * the RTT estimator variables intact (e.g., srtt, mdev, rttvar).
50852f20e65SYuchung Cheng * Later the RTO will be updated immediately upon obtaining the first
50952f20e65SYuchung Cheng * data RTT sample (tcp_rtt_estimator()). Hence the cached RTT only
51052f20e65SYuchung Cheng * influences the first RTO but not later RTT estimation.
51152f20e65SYuchung Cheng *
51252f20e65SYuchung Cheng * But if RTT is not available from the SYN (due to retransmits or
51352f20e65SYuchung Cheng * syn cookies) or the cache, force a conservative 3secs timeout.
51452f20e65SYuchung Cheng *
51552f20e65SYuchung Cheng * A bit of theory. RTT is time passed after "normal" sized packet
51652f20e65SYuchung Cheng * is sent until it is ACKed. In normal circumstances sending small
51752f20e65SYuchung Cheng * packets force peer to delay ACKs and calculation is correct too.
51852f20e65SYuchung Cheng * The algorithm is adaptive and, provided we follow specs, it
51952f20e65SYuchung Cheng * NEVER underestimate RTT. BUT! If peer tries to make some clever
52052f20e65SYuchung Cheng * tricks sort of "quick acks" for time long enough to decrease RTT
52152f20e65SYuchung Cheng * to low value, and then abruptly stops to do it and starts to delay
52252f20e65SYuchung Cheng * ACKs, wait for troubles.
5231b7fdd2aSYuchung Cheng */
524740b0f18SEric Dumazet if (crtt > tp->srtt_us) {
525269aa759SNeal Cardwell /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
5269bdfb3b7SKonstantin Khlebnikov crtt /= 8 * USEC_PER_SEC / HZ;
527269aa759SNeal Cardwell inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
528740b0f18SEric Dumazet } else if (tp->srtt_us == 0) {
5294aabd8efSDavid S. Miller /* RFC6298: 5.7 We've failed to get a valid RTT sample from
5304aabd8efSDavid S. Miller * 3WHS. This is most likely due to retransmission,
5314aabd8efSDavid S. Miller * including spurious one. Reset the RTO back to 3secs
5324aabd8efSDavid S. Miller * from the more aggressive 1sec to avoid more spurious
5334aabd8efSDavid S. Miller * retransmission.
5344aabd8efSDavid S. Miller */
535740b0f18SEric Dumazet tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
536740b0f18SEric Dumazet tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
537740b0f18SEric Dumazet
5384aabd8efSDavid S. Miller inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
5394aabd8efSDavid S. Miller }
5404aabd8efSDavid S. Miller }
541ab92bb2fSDavid S. Miller
tcp_peer_is_proven(struct request_sock * req,struct dst_entry * dst)542d82bae12SSoheil Hassas Yeganeh bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
543ab92bb2fSDavid S. Miller {
54451c5d0c4SDavid S. Miller struct tcp_metrics_block *tm;
54551c5d0c4SDavid S. Miller bool ret;
54651c5d0c4SDavid S. Miller
547ab92bb2fSDavid S. Miller if (!dst)
548ab92bb2fSDavid S. Miller return false;
54951c5d0c4SDavid S. Miller
55051c5d0c4SDavid S. Miller rcu_read_lock();
55151c5d0c4SDavid S. Miller tm = __tcp_get_metrics_req(req, dst);
552d82bae12SSoheil Hassas Yeganeh if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
55351c5d0c4SDavid S. Miller ret = true;
55451c5d0c4SDavid S. Miller else
55551c5d0c4SDavid S. Miller ret = false;
55681166dd6SDavid S. Miller rcu_read_unlock();
55781166dd6SDavid S. Miller
55881166dd6SDavid S. Miller return ret;
55981166dd6SDavid S. Miller }
56081166dd6SDavid S. Miller
tcp_fastopen_cache_get(struct sock * sk,u16 * mss,struct tcp_fastopen_cookie * cookie)5611fe4c481SYuchung Cheng void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
5627268586bSYuchung Cheng struct tcp_fastopen_cookie *cookie)
5631fe4c481SYuchung Cheng {
5641fe4c481SYuchung Cheng struct tcp_metrics_block *tm;
5651fe4c481SYuchung Cheng
5661fe4c481SYuchung Cheng rcu_read_lock();
5671fe4c481SYuchung Cheng tm = tcp_get_metrics(sk, __sk_dst_get(sk), false);
5681fe4c481SYuchung Cheng if (tm) {
5691fe4c481SYuchung Cheng struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
5701fe4c481SYuchung Cheng unsigned int seq;
5711fe4c481SYuchung Cheng
5721fe4c481SYuchung Cheng do {
5731fe4c481SYuchung Cheng seq = read_seqbegin(&fastopen_seqlock);
5741fe4c481SYuchung Cheng if (tfom->mss)
5751fe4c481SYuchung Cheng *mss = tfom->mss;
5761fe4c481SYuchung Cheng *cookie = tfom->cookie;
5772646c831SDaniel Lee if (cookie->len <= 0 && tfom->try_exp == 1)
5782646c831SDaniel Lee cookie->exp = true;
5791fe4c481SYuchung Cheng } while (read_seqretry(&fastopen_seqlock, seq));
5801fe4c481SYuchung Cheng }
5811fe4c481SYuchung Cheng rcu_read_unlock();
5821fe4c481SYuchung Cheng }
5831fe4c481SYuchung Cheng
tcp_fastopen_cache_set(struct sock * sk,u16 mss,struct tcp_fastopen_cookie * cookie,bool syn_lost,u16 try_exp)5841fe4c481SYuchung Cheng void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
5852646c831SDaniel Lee struct tcp_fastopen_cookie *cookie, bool syn_lost,
5862646c831SDaniel Lee u16 try_exp)
5871fe4c481SYuchung Cheng {
588dccf76caSEric Dumazet struct dst_entry *dst = __sk_dst_get(sk);
5891fe4c481SYuchung Cheng struct tcp_metrics_block *tm;
5901fe4c481SYuchung Cheng
591dccf76caSEric Dumazet if (!dst)
592dccf76caSEric Dumazet return;
5931fe4c481SYuchung Cheng rcu_read_lock();
594dccf76caSEric Dumazet tm = tcp_get_metrics(sk, dst, true);
5951fe4c481SYuchung Cheng if (tm) {
5961fe4c481SYuchung Cheng struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
5971fe4c481SYuchung Cheng
5981fe4c481SYuchung Cheng write_seqlock_bh(&fastopen_seqlock);
599c968601dSYuchung Cheng if (mss)
6001fe4c481SYuchung Cheng tfom->mss = mss;
601c968601dSYuchung Cheng if (cookie && cookie->len > 0)
6021fe4c481SYuchung Cheng tfom->cookie = *cookie;
6032646c831SDaniel Lee else if (try_exp > tfom->try_exp &&
6042646c831SDaniel Lee tfom->cookie.len <= 0 && !tfom->cookie.exp)
6052646c831SDaniel Lee tfom->try_exp = try_exp;
606aab48743SYuchung Cheng if (syn_lost) {
607aab48743SYuchung Cheng ++tfom->syn_loss;
608aab48743SYuchung Cheng tfom->last_syn_loss = jiffies;
609aab48743SYuchung Cheng } else
610aab48743SYuchung Cheng tfom->syn_loss = 0;
6111fe4c481SYuchung Cheng write_sequnlock_bh(&fastopen_seqlock);
6121fe4c481SYuchung Cheng }
6131fe4c481SYuchung Cheng rcu_read_unlock();
6141fe4c481SYuchung Cheng }
6151fe4c481SYuchung Cheng
616489111e5SJohannes Berg static struct genl_family tcp_metrics_nl_family;
617d23ff701SJulian Anastasov
6184f70c96fSstephen hemminger static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
619d23ff701SJulian Anastasov [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
620d23ff701SJulian Anastasov [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
621d23ff701SJulian Anastasov .len = sizeof(struct in6_addr), },
6228c2debddSJakub Kicinski [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
623d23ff701SJulian Anastasov /* Following attributes are not received for GET/DEL,
624d23ff701SJulian Anastasov * we keep them for reference
625d23ff701SJulian Anastasov */
626d23ff701SJulian Anastasov #if 0
627d23ff701SJulian Anastasov [TCP_METRICS_ATTR_AGE] = { .type = NLA_MSECS, },
628d23ff701SJulian Anastasov [TCP_METRICS_ATTR_TW_TSVAL] = { .type = NLA_U32, },
629d23ff701SJulian Anastasov [TCP_METRICS_ATTR_TW_TS_STAMP] = { .type = NLA_S32, },
630d23ff701SJulian Anastasov [TCP_METRICS_ATTR_VALS] = { .type = NLA_NESTED, },
631d23ff701SJulian Anastasov [TCP_METRICS_ATTR_FOPEN_MSS] = { .type = NLA_U16, },
632d23ff701SJulian Anastasov [TCP_METRICS_ATTR_FOPEN_SYN_DROPS] = { .type = NLA_U16, },
633d23ff701SJulian Anastasov [TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS] = { .type = NLA_MSECS, },
634d23ff701SJulian Anastasov [TCP_METRICS_ATTR_FOPEN_COOKIE] = { .type = NLA_BINARY,
635d23ff701SJulian Anastasov .len = TCP_FASTOPEN_COOKIE_MAX, },
636d23ff701SJulian Anastasov #endif
637d23ff701SJulian Anastasov };
638d23ff701SJulian Anastasov
639d23ff701SJulian Anastasov /* Add attributes, caller cancels its header on failure */
tcp_metrics_fill_info(struct sk_buff * msg,struct tcp_metrics_block * tm)640d23ff701SJulian Anastasov static int tcp_metrics_fill_info(struct sk_buff *msg,
641d23ff701SJulian Anastasov struct tcp_metrics_block *tm)
642d23ff701SJulian Anastasov {
643d23ff701SJulian Anastasov struct nlattr *nest;
644d23ff701SJulian Anastasov int i;
645d23ff701SJulian Anastasov
646324fd55aSChristoph Paasch switch (tm->tcpm_daddr.family) {
647d23ff701SJulian Anastasov case AF_INET:
648930345eaSJiri Benc if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4,
6493abef286SDavid Ahern inetpeer_get_addr_v4(&tm->tcpm_daddr)) < 0)
650d23ff701SJulian Anastasov goto nla_put_failure;
651930345eaSJiri Benc if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4,
6523abef286SDavid Ahern inetpeer_get_addr_v4(&tm->tcpm_saddr)) < 0)
6538a59359cSChristoph Paasch goto nla_put_failure;
654d23ff701SJulian Anastasov break;
655d23ff701SJulian Anastasov case AF_INET6:
656930345eaSJiri Benc if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6,
6573abef286SDavid Ahern inetpeer_get_addr_v6(&tm->tcpm_daddr)) < 0)
658d23ff701SJulian Anastasov goto nla_put_failure;
659930345eaSJiri Benc if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6,
6603abef286SDavid Ahern inetpeer_get_addr_v6(&tm->tcpm_saddr)) < 0)
6618a59359cSChristoph Paasch goto nla_put_failure;
662d23ff701SJulian Anastasov break;
663d23ff701SJulian Anastasov default:
664d23ff701SJulian Anastasov return -EAFNOSUPPORT;
665d23ff701SJulian Anastasov }
666d23ff701SJulian Anastasov
667d23ff701SJulian Anastasov if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
668949ad62aSEric Dumazet jiffies - READ_ONCE(tm->tcpm_stamp),
6692175d87cSNicolas Dichtel TCP_METRICS_ATTR_PAD) < 0)
670d23ff701SJulian Anastasov goto nla_put_failure;
671d23ff701SJulian Anastasov
672d23ff701SJulian Anastasov {
673d23ff701SJulian Anastasov int n = 0;
674d23ff701SJulian Anastasov
675ae0be8deSMichal Kubecek nest = nla_nest_start_noflag(msg, TCP_METRICS_ATTR_VALS);
676d23ff701SJulian Anastasov if (!nest)
677d23ff701SJulian Anastasov goto nla_put_failure;
678740b0f18SEric Dumazet for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
6798c4d04f6SEric Dumazet u32 val = tcp_metric_get(tm, i);
680740b0f18SEric Dumazet
681740b0f18SEric Dumazet if (!val)
682d23ff701SJulian Anastasov continue;
683740b0f18SEric Dumazet if (i == TCP_METRIC_RTT) {
684740b0f18SEric Dumazet if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1,
685740b0f18SEric Dumazet val) < 0)
686740b0f18SEric Dumazet goto nla_put_failure;
687740b0f18SEric Dumazet n++;
688740b0f18SEric Dumazet val = max(val / 1000, 1U);
689740b0f18SEric Dumazet }
690740b0f18SEric Dumazet if (i == TCP_METRIC_RTTVAR) {
691740b0f18SEric Dumazet if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1,
692740b0f18SEric Dumazet val) < 0)
693740b0f18SEric Dumazet goto nla_put_failure;
694740b0f18SEric Dumazet n++;
695740b0f18SEric Dumazet val = max(val / 1000, 1U);
696740b0f18SEric Dumazet }
697740b0f18SEric Dumazet if (nla_put_u32(msg, i + 1, val) < 0)
698d23ff701SJulian Anastasov goto nla_put_failure;
699d23ff701SJulian Anastasov n++;
700d23ff701SJulian Anastasov }
701d23ff701SJulian Anastasov if (n)
702d23ff701SJulian Anastasov nla_nest_end(msg, nest);
703d23ff701SJulian Anastasov else
704d23ff701SJulian Anastasov nla_nest_cancel(msg, nest);
705d23ff701SJulian Anastasov }
706d23ff701SJulian Anastasov
707d23ff701SJulian Anastasov {
708d23ff701SJulian Anastasov struct tcp_fastopen_metrics tfom_copy[1], *tfom;
709d23ff701SJulian Anastasov unsigned int seq;
710d23ff701SJulian Anastasov
711d23ff701SJulian Anastasov do {
712d23ff701SJulian Anastasov seq = read_seqbegin(&fastopen_seqlock);
713d23ff701SJulian Anastasov tfom_copy[0] = tm->tcpm_fastopen;
714d23ff701SJulian Anastasov } while (read_seqretry(&fastopen_seqlock, seq));
715d23ff701SJulian Anastasov
716d23ff701SJulian Anastasov tfom = tfom_copy;
717d23ff701SJulian Anastasov if (tfom->mss &&
718d23ff701SJulian Anastasov nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS,
719d23ff701SJulian Anastasov tfom->mss) < 0)
720d23ff701SJulian Anastasov goto nla_put_failure;
721d23ff701SJulian Anastasov if (tfom->syn_loss &&
722d23ff701SJulian Anastasov (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS,
723d23ff701SJulian Anastasov tfom->syn_loss) < 0 ||
724d23ff701SJulian Anastasov nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS,
7252175d87cSNicolas Dichtel jiffies - tfom->last_syn_loss,
7262175d87cSNicolas Dichtel TCP_METRICS_ATTR_PAD) < 0))
727d23ff701SJulian Anastasov goto nla_put_failure;
728d23ff701SJulian Anastasov if (tfom->cookie.len > 0 &&
729d23ff701SJulian Anastasov nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE,
730d23ff701SJulian Anastasov tfom->cookie.len, tfom->cookie.val) < 0)
731d23ff701SJulian Anastasov goto nla_put_failure;
732d23ff701SJulian Anastasov }
733d23ff701SJulian Anastasov
734d23ff701SJulian Anastasov return 0;
735d23ff701SJulian Anastasov
736d23ff701SJulian Anastasov nla_put_failure:
737d23ff701SJulian Anastasov return -EMSGSIZE;
738d23ff701SJulian Anastasov }
739d23ff701SJulian Anastasov
tcp_metrics_dump_info(struct sk_buff * skb,struct netlink_callback * cb,struct tcp_metrics_block * tm)740d23ff701SJulian Anastasov static int tcp_metrics_dump_info(struct sk_buff *skb,
741d23ff701SJulian Anastasov struct netlink_callback *cb,
742d23ff701SJulian Anastasov struct tcp_metrics_block *tm)
743d23ff701SJulian Anastasov {
744d23ff701SJulian Anastasov void *hdr;
745d23ff701SJulian Anastasov
74615e47304SEric W. Biederman hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
747d23ff701SJulian Anastasov &tcp_metrics_nl_family, NLM_F_MULTI,
748d23ff701SJulian Anastasov TCP_METRICS_CMD_GET);
749d23ff701SJulian Anastasov if (!hdr)
750d23ff701SJulian Anastasov return -EMSGSIZE;
751d23ff701SJulian Anastasov
752d23ff701SJulian Anastasov if (tcp_metrics_fill_info(skb, tm) < 0)
753d23ff701SJulian Anastasov goto nla_put_failure;
754d23ff701SJulian Anastasov
755053c095aSJohannes Berg genlmsg_end(skb, hdr);
756053c095aSJohannes Berg return 0;
757d23ff701SJulian Anastasov
758d23ff701SJulian Anastasov nla_put_failure:
759d23ff701SJulian Anastasov genlmsg_cancel(skb, hdr);
760d23ff701SJulian Anastasov return -EMSGSIZE;
761d23ff701SJulian Anastasov }
762d23ff701SJulian Anastasov
tcp_metrics_nl_dump(struct sk_buff * skb,struct netlink_callback * cb)763d23ff701SJulian Anastasov static int tcp_metrics_nl_dump(struct sk_buff *skb,
764d23ff701SJulian Anastasov struct netlink_callback *cb)
765d23ff701SJulian Anastasov {
766d23ff701SJulian Anastasov struct net *net = sock_net(skb->sk);
767098a697bSEric W. Biederman unsigned int max_rows = 1U << tcp_metrics_hash_log;
768d23ff701SJulian Anastasov unsigned int row, s_row = cb->args[0];
769d23ff701SJulian Anastasov int s_col = cb->args[1], col = s_col;
770d23ff701SJulian Anastasov
771d23ff701SJulian Anastasov for (row = s_row; row < max_rows; row++, s_col = 0) {
772d23ff701SJulian Anastasov struct tcp_metrics_block *tm;
773098a697bSEric W. Biederman struct tcpm_hash_bucket *hb = tcp_metrics_hash + row;
774d23ff701SJulian Anastasov
775d23ff701SJulian Anastasov rcu_read_lock();
776d23ff701SJulian Anastasov for (col = 0, tm = rcu_dereference(hb->chain); tm;
777d23ff701SJulian Anastasov tm = rcu_dereference(tm->tcpm_next), col++) {
778849e8a0cSEric W. Biederman if (!net_eq(tm_net(tm), net))
779849e8a0cSEric W. Biederman continue;
780d23ff701SJulian Anastasov if (col < s_col)
781d23ff701SJulian Anastasov continue;
782d23ff701SJulian Anastasov if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
783d23ff701SJulian Anastasov rcu_read_unlock();
784d23ff701SJulian Anastasov goto done;
785d23ff701SJulian Anastasov }
786d23ff701SJulian Anastasov }
787d23ff701SJulian Anastasov rcu_read_unlock();
788d23ff701SJulian Anastasov }
789d23ff701SJulian Anastasov
790d23ff701SJulian Anastasov done:
791d23ff701SJulian Anastasov cb->args[0] = row;
792d23ff701SJulian Anastasov cb->args[1] = col;
793d23ff701SJulian Anastasov return skb->len;
794d23ff701SJulian Anastasov }
795d23ff701SJulian Anastasov
__parse_nl_addr(struct genl_info * info,struct inetpeer_addr * addr,unsigned int * hash,int optional,int v4,int v6)7963e7013ddSChristoph Paasch static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
7973e7013ddSChristoph Paasch unsigned int *hash, int optional, int v4, int v6)
798d23ff701SJulian Anastasov {
799d23ff701SJulian Anastasov struct nlattr *a;
800d23ff701SJulian Anastasov
8013e7013ddSChristoph Paasch a = info->attrs[v4];
802d23ff701SJulian Anastasov if (a) {
8033abef286SDavid Ahern inetpeer_set_addr_v4(addr, nla_get_in_addr(a));
8043e7013ddSChristoph Paasch if (hash)
8053abef286SDavid Ahern *hash = ipv4_addr_hash(inetpeer_get_addr_v4(addr));
806d23ff701SJulian Anastasov return 0;
807d23ff701SJulian Anastasov }
8083e7013ddSChristoph Paasch a = info->attrs[v6];
809d23ff701SJulian Anastasov if (a) {
8103abef286SDavid Ahern struct in6_addr in6;
8113abef286SDavid Ahern
8122c42a3fbSJulian Anastasov if (nla_len(a) != sizeof(struct in6_addr))
813d23ff701SJulian Anastasov return -EINVAL;
8143abef286SDavid Ahern in6 = nla_get_in6_addr(a);
8153abef286SDavid Ahern inetpeer_set_addr_v6(addr, &in6);
8163e7013ddSChristoph Paasch if (hash)
8173abef286SDavid Ahern *hash = ipv6_addr_hash(inetpeer_get_addr_v6(addr));
818d23ff701SJulian Anastasov return 0;
819d23ff701SJulian Anastasov }
820d23ff701SJulian Anastasov return optional ? 1 : -EAFNOSUPPORT;
821d23ff701SJulian Anastasov }
822d23ff701SJulian Anastasov
parse_nl_addr(struct genl_info * info,struct inetpeer_addr * addr,unsigned int * hash,int optional)8233e7013ddSChristoph Paasch static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
8243e7013ddSChristoph Paasch unsigned int *hash, int optional)
8253e7013ddSChristoph Paasch {
8263e7013ddSChristoph Paasch return __parse_nl_addr(info, addr, hash, optional,
8273e7013ddSChristoph Paasch TCP_METRICS_ATTR_ADDR_IPV4,
8283e7013ddSChristoph Paasch TCP_METRICS_ATTR_ADDR_IPV6);
8293e7013ddSChristoph Paasch }
8303e7013ddSChristoph Paasch
parse_nl_saddr(struct genl_info * info,struct inetpeer_addr * addr)8313e7013ddSChristoph Paasch static int parse_nl_saddr(struct genl_info *info, struct inetpeer_addr *addr)
8323e7013ddSChristoph Paasch {
8333e7013ddSChristoph Paasch return __parse_nl_addr(info, addr, NULL, 0,
8343e7013ddSChristoph Paasch TCP_METRICS_ATTR_SADDR_IPV4,
8353e7013ddSChristoph Paasch TCP_METRICS_ATTR_SADDR_IPV6);
8363e7013ddSChristoph Paasch }
8373e7013ddSChristoph Paasch
tcp_metrics_nl_cmd_get(struct sk_buff * skb,struct genl_info * info)838d23ff701SJulian Anastasov static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
839d23ff701SJulian Anastasov {
840d23ff701SJulian Anastasov struct tcp_metrics_block *tm;
8413e7013ddSChristoph Paasch struct inetpeer_addr saddr, daddr;
842d23ff701SJulian Anastasov unsigned int hash;
843d23ff701SJulian Anastasov struct sk_buff *msg;
844d23ff701SJulian Anastasov struct net *net = genl_info_net(info);
845d23ff701SJulian Anastasov void *reply;
846d23ff701SJulian Anastasov int ret;
8473e7013ddSChristoph Paasch bool src = true;
848d23ff701SJulian Anastasov
849324fd55aSChristoph Paasch ret = parse_nl_addr(info, &daddr, &hash, 0);
850d23ff701SJulian Anastasov if (ret < 0)
851d23ff701SJulian Anastasov return ret;
852d23ff701SJulian Anastasov
8533e7013ddSChristoph Paasch ret = parse_nl_saddr(info, &saddr);
8543e7013ddSChristoph Paasch if (ret < 0)
8553e7013ddSChristoph Paasch src = false;
8563e7013ddSChristoph Paasch
857d23ff701SJulian Anastasov msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
858d23ff701SJulian Anastasov if (!msg)
859d23ff701SJulian Anastasov return -ENOMEM;
860d23ff701SJulian Anastasov
861d23ff701SJulian Anastasov reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0,
862d23ff701SJulian Anastasov info->genlhdr->cmd);
863d23ff701SJulian Anastasov if (!reply)
864d23ff701SJulian Anastasov goto nla_put_failure;
865d23ff701SJulian Anastasov
8663e5da62dSEric W. Biederman hash ^= net_hash_mix(net);
867098a697bSEric W. Biederman hash = hash_32(hash, tcp_metrics_hash_log);
868d23ff701SJulian Anastasov ret = -ESRCH;
869d23ff701SJulian Anastasov rcu_read_lock();
870098a697bSEric W. Biederman for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
871d23ff701SJulian Anastasov tm = rcu_dereference(tm->tcpm_next)) {
8723e7013ddSChristoph Paasch if (addr_same(&tm->tcpm_daddr, &daddr) &&
873849e8a0cSEric W. Biederman (!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
874849e8a0cSEric W. Biederman net_eq(tm_net(tm), net)) {
875d23ff701SJulian Anastasov ret = tcp_metrics_fill_info(msg, tm);
876d23ff701SJulian Anastasov break;
877d23ff701SJulian Anastasov }
878d23ff701SJulian Anastasov }
879d23ff701SJulian Anastasov rcu_read_unlock();
880d23ff701SJulian Anastasov if (ret < 0)
881d23ff701SJulian Anastasov goto out_free;
882d23ff701SJulian Anastasov
883d23ff701SJulian Anastasov genlmsg_end(msg, reply);
884d23ff701SJulian Anastasov return genlmsg_reply(msg, info);
885d23ff701SJulian Anastasov
886d23ff701SJulian Anastasov nla_put_failure:
887d23ff701SJulian Anastasov ret = -EMSGSIZE;
888d23ff701SJulian Anastasov
889d23ff701SJulian Anastasov out_free:
890d23ff701SJulian Anastasov nlmsg_free(msg);
891d23ff701SJulian Anastasov return ret;
892d23ff701SJulian Anastasov }
893d23ff701SJulian Anastasov
tcp_metrics_flush_all(struct net * net)8948a4bff71SEric W. Biederman static void tcp_metrics_flush_all(struct net *net)
895d23ff701SJulian Anastasov {
896098a697bSEric W. Biederman unsigned int max_rows = 1U << tcp_metrics_hash_log;
897098a697bSEric W. Biederman struct tcpm_hash_bucket *hb = tcp_metrics_hash;
898d23ff701SJulian Anastasov struct tcp_metrics_block *tm;
899d23ff701SJulian Anastasov unsigned int row;
900d23ff701SJulian Anastasov
901d23ff701SJulian Anastasov for (row = 0; row < max_rows; row++, hb++) {
902*77100f2eSEric Dumazet struct tcp_metrics_block __rcu **pp = &hb->chain;
903789e6ddbSEric Dumazet bool match;
904789e6ddbSEric Dumazet
905*77100f2eSEric Dumazet if (!rcu_access_pointer(*pp))
906*77100f2eSEric Dumazet continue;
907*77100f2eSEric Dumazet
908d23ff701SJulian Anastasov spin_lock_bh(&tcp_metrics_lock);
9099f1ab186SEric Dumazet for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
910789e6ddbSEric Dumazet match = net ? net_eq(tm_net(tm), net) :
9118b8f3e66SChristian Brauner !refcount_read(&tm_net(tm)->ns.count);
912789e6ddbSEric Dumazet if (match) {
913627bda00SEric Dumazet rcu_assign_pointer(*pp, tm->tcpm_next);
914d23ff701SJulian Anastasov kfree_rcu(tm, rcu_head);
91504f721c6SEric W. Biederman } else {
91604f721c6SEric W. Biederman pp = &tm->tcpm_next;
917d23ff701SJulian Anastasov }
918d23ff701SJulian Anastasov }
91904f721c6SEric W. Biederman spin_unlock_bh(&tcp_metrics_lock);
920*77100f2eSEric Dumazet cond_resched();
92104f721c6SEric W. Biederman }
922d23ff701SJulian Anastasov }
923d23ff701SJulian Anastasov
tcp_metrics_nl_cmd_del(struct sk_buff * skb,struct genl_info * info)924d23ff701SJulian Anastasov static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
925d23ff701SJulian Anastasov {
926d23ff701SJulian Anastasov struct tcpm_hash_bucket *hb;
92700ca9c5bSChristoph Paasch struct tcp_metrics_block *tm;
928d23ff701SJulian Anastasov struct tcp_metrics_block __rcu **pp;
9293e7013ddSChristoph Paasch struct inetpeer_addr saddr, daddr;
930d23ff701SJulian Anastasov unsigned int hash;
931d23ff701SJulian Anastasov struct net *net = genl_info_net(info);
932d23ff701SJulian Anastasov int ret;
93300ca9c5bSChristoph Paasch bool src = true, found = false;
934d23ff701SJulian Anastasov
935324fd55aSChristoph Paasch ret = parse_nl_addr(info, &daddr, &hash, 1);
936d23ff701SJulian Anastasov if (ret < 0)
937d23ff701SJulian Anastasov return ret;
9388a4bff71SEric W. Biederman if (ret > 0) {
9398a4bff71SEric W. Biederman tcp_metrics_flush_all(net);
9408a4bff71SEric W. Biederman return 0;
9418a4bff71SEric W. Biederman }
9423e7013ddSChristoph Paasch ret = parse_nl_saddr(info, &saddr);
9433e7013ddSChristoph Paasch if (ret < 0)
9443e7013ddSChristoph Paasch src = false;
945d23ff701SJulian Anastasov
9463e5da62dSEric W. Biederman hash ^= net_hash_mix(net);
947098a697bSEric W. Biederman hash = hash_32(hash, tcp_metrics_hash_log);
948098a697bSEric W. Biederman hb = tcp_metrics_hash + hash;
949d23ff701SJulian Anastasov pp = &hb->chain;
950d23ff701SJulian Anastasov spin_lock_bh(&tcp_metrics_lock);
9519f1ab186SEric Dumazet for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
9523e7013ddSChristoph Paasch if (addr_same(&tm->tcpm_daddr, &daddr) &&
953849e8a0cSEric W. Biederman (!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
954849e8a0cSEric W. Biederman net_eq(tm_net(tm), net)) {
955627bda00SEric Dumazet rcu_assign_pointer(*pp, tm->tcpm_next);
95600ca9c5bSChristoph Paasch kfree_rcu(tm, rcu_head);
95700ca9c5bSChristoph Paasch found = true;
958bbf852b9SChristoph Paasch } else {
959bbf852b9SChristoph Paasch pp = &tm->tcpm_next;
960d23ff701SJulian Anastasov }
961d23ff701SJulian Anastasov }
962d23ff701SJulian Anastasov spin_unlock_bh(&tcp_metrics_lock);
96300ca9c5bSChristoph Paasch if (!found)
964d23ff701SJulian Anastasov return -ESRCH;
965d23ff701SJulian Anastasov return 0;
966d23ff701SJulian Anastasov }
967d23ff701SJulian Anastasov
96866a9b928SJakub Kicinski static const struct genl_small_ops tcp_metrics_nl_ops[] = {
969d23ff701SJulian Anastasov {
970d23ff701SJulian Anastasov .cmd = TCP_METRICS_CMD_GET,
971ef6243acSJohannes Berg .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
972d23ff701SJulian Anastasov .doit = tcp_metrics_nl_cmd_get,
973d23ff701SJulian Anastasov .dumpit = tcp_metrics_nl_dump,
974d23ff701SJulian Anastasov },
975d23ff701SJulian Anastasov {
976d23ff701SJulian Anastasov .cmd = TCP_METRICS_CMD_DEL,
977ef6243acSJohannes Berg .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
978d23ff701SJulian Anastasov .doit = tcp_metrics_nl_cmd_del,
979d23ff701SJulian Anastasov .flags = GENL_ADMIN_PERM,
980d23ff701SJulian Anastasov },
981d23ff701SJulian Anastasov };
982d23ff701SJulian Anastasov
98356989f6dSJohannes Berg static struct genl_family tcp_metrics_nl_family __ro_after_init = {
984489111e5SJohannes Berg .hdrsize = 0,
985489111e5SJohannes Berg .name = TCP_METRICS_GENL_NAME,
986489111e5SJohannes Berg .version = TCP_METRICS_GENL_VERSION,
987489111e5SJohannes Berg .maxattr = TCP_METRICS_ATTR_MAX,
9883b0f31f2SJohannes Berg .policy = tcp_metrics_nl_policy,
989489111e5SJohannes Berg .netnsok = true,
990489111e5SJohannes Berg .module = THIS_MODULE,
99166a9b928SJakub Kicinski .small_ops = tcp_metrics_nl_ops,
99266a9b928SJakub Kicinski .n_small_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
9939c5d03d3SJakub Kicinski .resv_start_op = TCP_METRICS_CMD_DEL + 1,
994489111e5SJohannes Berg };
995489111e5SJohannes Berg
996c4a6b2daSEric Dumazet static unsigned int tcpmhash_entries __initdata;
set_tcpmhash_entries(char * str)99751c5d0c4SDavid S. Miller static int __init set_tcpmhash_entries(char *str)
99851c5d0c4SDavid S. Miller {
99951c5d0c4SDavid S. Miller ssize_t ret;
100051c5d0c4SDavid S. Miller
100151c5d0c4SDavid S. Miller if (!str)
100251c5d0c4SDavid S. Miller return 0;
100351c5d0c4SDavid S. Miller
10045815d5e7SEric Dumazet ret = kstrtouint(str, 0, &tcpmhash_entries);
100551c5d0c4SDavid S. Miller if (ret)
100651c5d0c4SDavid S. Miller return 0;
100751c5d0c4SDavid S. Miller
100851c5d0c4SDavid S. Miller return 1;
100951c5d0c4SDavid S. Miller }
101051c5d0c4SDavid S. Miller __setup("tcpmhash_entries=", set_tcpmhash_entries);
101151c5d0c4SDavid S. Miller
tcp_metrics_hash_alloc(void)1012c4a6b2daSEric Dumazet static void __init tcp_metrics_hash_alloc(void)
101351c5d0c4SDavid S. Miller {
1014c4a6b2daSEric Dumazet unsigned int slots = tcpmhash_entries;
10155815d5e7SEric Dumazet size_t size;
101651c5d0c4SDavid S. Miller
101751c5d0c4SDavid S. Miller if (!slots) {
1018ca79b0c2SArun KS if (totalram_pages() >= 128 * 1024)
101951c5d0c4SDavid S. Miller slots = 16 * 1024;
102051c5d0c4SDavid S. Miller else
102151c5d0c4SDavid S. Miller slots = 8 * 1024;
102251c5d0c4SDavid S. Miller }
102351c5d0c4SDavid S. Miller
1024098a697bSEric W. Biederman tcp_metrics_hash_log = order_base_2(slots);
1025098a697bSEric W. Biederman size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log;
102651c5d0c4SDavid S. Miller
1027752ade68SMichal Hocko tcp_metrics_hash = kvzalloc(size, GFP_KERNEL);
1028098a697bSEric W. Biederman if (!tcp_metrics_hash)
1029c4a6b2daSEric Dumazet panic("Could not allocate the tcp_metrics hash table\n");
103051c5d0c4SDavid S. Miller }
103151c5d0c4SDavid S. Miller
tcp_net_metrics_exit_batch(struct list_head * net_exit_list)1032789e6ddbSEric Dumazet static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
103351c5d0c4SDavid S. Miller {
1034789e6ddbSEric Dumazet tcp_metrics_flush_all(NULL);
103551c5d0c4SDavid S. Miller }
103651c5d0c4SDavid S. Miller
103751c5d0c4SDavid S. Miller static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
1038789e6ddbSEric Dumazet .exit_batch = tcp_net_metrics_exit_batch,
103951c5d0c4SDavid S. Miller };
104051c5d0c4SDavid S. Miller
tcp_metrics_init(void)104151c5d0c4SDavid S. Miller void __init tcp_metrics_init(void)
104251c5d0c4SDavid S. Miller {
1043d23ff701SJulian Anastasov int ret;
1044d23ff701SJulian Anastasov
1045c4a6b2daSEric Dumazet tcp_metrics_hash_alloc();
1046c4a6b2daSEric Dumazet
1047d23ff701SJulian Anastasov ret = register_pernet_subsys(&tcp_net_metrics_ops);
1048d23ff701SJulian Anastasov if (ret < 0)
1049c4a6b2daSEric Dumazet panic("Could not register tcp_net_metrics_ops\n");
10506493517eSEric W. Biederman
1051489111e5SJohannes Berg ret = genl_register_family(&tcp_metrics_nl_family);
1052d23ff701SJulian Anastasov if (ret < 0)
10536493517eSEric W. Biederman panic("Could not register tcp_metrics generic netlink\n");
105451c5d0c4SDavid S. Miller }
1055