12767c977SYonghong Song // SPDX-License-Identifier: GPL-2.0
22767c977SYonghong Song /* Copyright (c) 2020 Facebook */
32767c977SYonghong Song #include "bpf_iter.h"
42767c977SYonghong Song #include "bpf_tracing_net.h"
52767c977SYonghong Song #include <bpf/bpf_helpers.h>
62767c977SYonghong Song #include <bpf/bpf_endian.h>
72767c977SYonghong Song 
82767c977SYonghong Song char _license[] SEC("license") = "GPL";
92767c977SYonghong Song 
hlist_unhashed_lockless(const struct hlist_node * h)102767c977SYonghong Song static int hlist_unhashed_lockless(const struct hlist_node *h)
112767c977SYonghong Song {
122767c977SYonghong Song         return !(h->pprev);
132767c977SYonghong Song }
142767c977SYonghong Song 
timer_pending(const struct timer_list * timer)152767c977SYonghong Song static int timer_pending(const struct timer_list * timer)
162767c977SYonghong Song {
172767c977SYonghong Song 	return !hlist_unhashed_lockless(&timer->entry);
182767c977SYonghong Song }
192767c977SYonghong Song 
202767c977SYonghong Song extern unsigned CONFIG_HZ __kconfig;
212767c977SYonghong Song 
222767c977SYonghong Song #define USER_HZ		100
232767c977SYonghong Song #define NSEC_PER_SEC	1000000000ULL
jiffies_to_clock_t(unsigned long x)242767c977SYonghong Song static clock_t jiffies_to_clock_t(unsigned long x)
252767c977SYonghong Song {
262767c977SYonghong Song 	/* The implementation here tailored to a particular
272767c977SYonghong Song 	 * setting of USER_HZ.
282767c977SYonghong Song 	 */
292767c977SYonghong Song 	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
302767c977SYonghong Song 	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
312767c977SYonghong Song 
322767c977SYonghong Song 	if ((tick_nsec % user_hz_nsec) == 0) {
332767c977SYonghong Song 		if (CONFIG_HZ < USER_HZ)
342767c977SYonghong Song 			return x * (USER_HZ / CONFIG_HZ);
352767c977SYonghong Song 		else
362767c977SYonghong Song 			return x / (CONFIG_HZ / USER_HZ);
372767c977SYonghong Song 	}
382767c977SYonghong Song 	return x * tick_nsec/user_hz_nsec;
392767c977SYonghong Song }
402767c977SYonghong Song 
jiffies_delta_to_clock_t(long delta)412767c977SYonghong Song static clock_t jiffies_delta_to_clock_t(long delta)
422767c977SYonghong Song {
432767c977SYonghong Song 	if (delta <= 0)
442767c977SYonghong Song 		return 0;
452767c977SYonghong Song 
462767c977SYonghong Song 	return jiffies_to_clock_t(delta);
472767c977SYonghong Song }
482767c977SYonghong Song 
sock_i_ino(const struct sock * sk)492767c977SYonghong Song static long sock_i_ino(const struct sock *sk)
502767c977SYonghong Song {
512767c977SYonghong Song 	const struct socket *sk_socket = sk->sk_socket;
522767c977SYonghong Song 	const struct inode *inode;
532767c977SYonghong Song 	unsigned long ino;
542767c977SYonghong Song 
552767c977SYonghong Song 	if (!sk_socket)
562767c977SYonghong Song 		return 0;
572767c977SYonghong Song 
582767c977SYonghong Song 	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
59e4d9c232SIlya Leoshkevich 	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
602767c977SYonghong Song 	return ino;
612767c977SYonghong Song }
622767c977SYonghong Song 
632767c977SYonghong Song static bool
inet_csk_in_pingpong_mode(const struct inet_connection_sock * icsk)642767c977SYonghong Song inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
652767c977SYonghong Song {
662767c977SYonghong Song 	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
672767c977SYonghong Song }
682767c977SYonghong Song 
tcp_in_initial_slowstart(const struct tcp_sock * tcp)692767c977SYonghong Song static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
702767c977SYonghong Song {
712767c977SYonghong Song 	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
722767c977SYonghong Song }
732767c977SYonghong Song 
dump_tcp6_sock(struct seq_file * seq,struct tcp6_sock * tp,uid_t uid,__u32 seq_num)742767c977SYonghong Song static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
752767c977SYonghong Song 			 uid_t uid, __u32 seq_num)
762767c977SYonghong Song {
772767c977SYonghong Song 	const struct inet_connection_sock *icsk;
782767c977SYonghong Song 	const struct fastopen_queue *fastopenq;
792767c977SYonghong Song 	const struct in6_addr *dest, *src;
802767c977SYonghong Song 	const struct inet_sock *inet;
812767c977SYonghong Song 	unsigned long timer_expires;
822767c977SYonghong Song 	const struct sock *sp;
832767c977SYonghong Song 	__u16 destp, srcp;
842767c977SYonghong Song 	int timer_active;
852767c977SYonghong Song 	int rx_queue;
862767c977SYonghong Song 	int state;
872767c977SYonghong Song 
882767c977SYonghong Song 	icsk = &tp->tcp.inet_conn;
892767c977SYonghong Song 	inet = &icsk->icsk_inet;
902767c977SYonghong Song 	sp = &inet->sk;
912767c977SYonghong Song 	fastopenq = &icsk->icsk_accept_queue.fastopenq;
922767c977SYonghong Song 
932767c977SYonghong Song 	dest = &sp->sk_v6_daddr;
942767c977SYonghong Song 	src = &sp->sk_v6_rcv_saddr;
952767c977SYonghong Song 	destp = bpf_ntohs(inet->inet_dport);
962767c977SYonghong Song 	srcp = bpf_ntohs(inet->inet_sport);
972767c977SYonghong Song 
982767c977SYonghong Song 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
992767c977SYonghong Song 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1002767c977SYonghong Song 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1012767c977SYonghong Song 		timer_active = 1;
1022767c977SYonghong Song 		timer_expires = icsk->icsk_timeout;
1032767c977SYonghong Song 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1042767c977SYonghong Song 		timer_active = 4;
1052767c977SYonghong Song 		timer_expires = icsk->icsk_timeout;
1062767c977SYonghong Song 	} else if (timer_pending(&sp->sk_timer)) {
1072767c977SYonghong Song 		timer_active = 2;
1082767c977SYonghong Song 		timer_expires = sp->sk_timer.expires;
1092767c977SYonghong Song 	} else {
1102767c977SYonghong Song 		timer_active = 0;
1112767c977SYonghong Song 		timer_expires = bpf_jiffies64();
1122767c977SYonghong Song 	}
1132767c977SYonghong Song 
1142767c977SYonghong Song 	state = sp->sk_state;
1152767c977SYonghong Song 	if (state == TCP_LISTEN) {
1162767c977SYonghong Song 		rx_queue = sp->sk_ack_backlog;
1172767c977SYonghong Song 	} else {
1182767c977SYonghong Song 		rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
1192767c977SYonghong Song 		if (rx_queue < 0)
1202767c977SYonghong Song 			rx_queue = 0;
1212767c977SYonghong Song 	}
1222767c977SYonghong Song 
1232767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
1242767c977SYonghong Song 		       seq_num,
1252767c977SYonghong Song 		       src->s6_addr32[0], src->s6_addr32[1],
1262767c977SYonghong Song 		       src->s6_addr32[2], src->s6_addr32[3], srcp,
1272767c977SYonghong Song 		       dest->s6_addr32[0], dest->s6_addr32[1],
1282767c977SYonghong Song 		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
1292767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
1302767c977SYonghong Song 		       state,
1312767c977SYonghong Song 		       tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
1322767c977SYonghong Song 		       timer_active,
1332767c977SYonghong Song 		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
1342767c977SYonghong Song 		       icsk->icsk_retransmits, uid,
1352767c977SYonghong Song 		       icsk->icsk_probes_out,
1362767c977SYonghong Song 		       sock_i_ino(sp),
1372767c977SYonghong Song 		       sp->sk_refcnt.refs.counter);
1382767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
1392767c977SYonghong Song 		       tp,
1402767c977SYonghong Song 		       jiffies_to_clock_t(icsk->icsk_rto),
1412767c977SYonghong Song 		       jiffies_to_clock_t(icsk->icsk_ack.ato),
1422767c977SYonghong Song 		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
1432767c977SYonghong Song 		       tp->tcp.snd_cwnd,
1442767c977SYonghong Song 		       state == TCP_LISTEN ? fastopenq->max_qlen
1452767c977SYonghong Song 				: (tcp_in_initial_slowstart(&tp->tcp) ? -1
1462767c977SYonghong Song 								      : tp->tcp.snd_ssthresh)
1472767c977SYonghong Song 		      );
1482767c977SYonghong Song 
1492767c977SYonghong Song 	return 0;
1502767c977SYonghong Song }
1512767c977SYonghong Song 
dump_tw_sock(struct seq_file * seq,struct tcp_timewait_sock * ttw,uid_t uid,__u32 seq_num)1522767c977SYonghong Song static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
1532767c977SYonghong Song 			uid_t uid, __u32 seq_num)
1542767c977SYonghong Song {
1552767c977SYonghong Song 	struct inet_timewait_sock *tw = &ttw->tw_sk;
1562767c977SYonghong Song 	const struct in6_addr *dest, *src;
1572767c977SYonghong Song 	__u16 destp, srcp;
1582767c977SYonghong Song 	long delta;
1592767c977SYonghong Song 
1602767c977SYonghong Song 	delta = tw->tw_timer.expires - bpf_jiffies64();
1612767c977SYonghong Song 	dest = &tw->tw_v6_daddr;
1622767c977SYonghong Song 	src  = &tw->tw_v6_rcv_saddr;
1632767c977SYonghong Song 	destp = bpf_ntohs(tw->tw_dport);
1642767c977SYonghong Song 	srcp  = bpf_ntohs(tw->tw_sport);
1652767c977SYonghong Song 
1662767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
1672767c977SYonghong Song 		       seq_num,
1682767c977SYonghong Song 		       src->s6_addr32[0], src->s6_addr32[1],
1692767c977SYonghong Song 		       src->s6_addr32[2], src->s6_addr32[3], srcp,
1702767c977SYonghong Song 		       dest->s6_addr32[0], dest->s6_addr32[1],
1712767c977SYonghong Song 		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
1722767c977SYonghong Song 
1732767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1742767c977SYonghong Song 		       tw->tw_substate, 0, 0,
1752767c977SYonghong Song 		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1762767c977SYonghong Song 		       tw->tw_refcnt.refs.counter, tw);
1772767c977SYonghong Song 
1782767c977SYonghong Song 	return 0;
1792767c977SYonghong Song }
1802767c977SYonghong Song 
dump_req_sock(struct seq_file * seq,struct tcp_request_sock * treq,uid_t uid,__u32 seq_num)1812767c977SYonghong Song static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
1822767c977SYonghong Song 			 uid_t uid, __u32 seq_num)
1832767c977SYonghong Song {
1842767c977SYonghong Song 	struct inet_request_sock *irsk = &treq->req;
1852767c977SYonghong Song 	struct request_sock *req = &irsk->req;
1862767c977SYonghong Song 	struct in6_addr *src, *dest;
1872767c977SYonghong Song 	long ttd;
1882767c977SYonghong Song 
1892767c977SYonghong Song 	ttd = req->rsk_timer.expires - bpf_jiffies64();
1902767c977SYonghong Song 	src = &irsk->ir_v6_loc_addr;
1912767c977SYonghong Song 	dest = &irsk->ir_v6_rmt_addr;
1922767c977SYonghong Song 
1932767c977SYonghong Song 	if (ttd < 0)
1942767c977SYonghong Song 		ttd = 0;
1952767c977SYonghong Song 
1962767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
1972767c977SYonghong Song 		       seq_num,
1982767c977SYonghong Song 		       src->s6_addr32[0], src->s6_addr32[1],
1992767c977SYonghong Song 		       src->s6_addr32[2], src->s6_addr32[3],
2002767c977SYonghong Song 		       irsk->ir_num,
2012767c977SYonghong Song 		       dest->s6_addr32[0], dest->s6_addr32[1],
2022767c977SYonghong Song 		       dest->s6_addr32[2], dest->s6_addr32[3],
2032767c977SYonghong Song 		       bpf_ntohs(irsk->ir_rmt_port));
2042767c977SYonghong Song 	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2052767c977SYonghong Song 		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
2062767c977SYonghong Song 		       req->num_timeout, uid, 0, 0, 0, req);
2072767c977SYonghong Song 
2082767c977SYonghong Song 	return 0;
2092767c977SYonghong Song }
2102767c977SYonghong Song 
2112767c977SYonghong Song SEC("iter/tcp")
dump_tcp6(struct bpf_iter__tcp * ctx)2122767c977SYonghong Song int dump_tcp6(struct bpf_iter__tcp *ctx)
2132767c977SYonghong Song {
2142767c977SYonghong Song 	struct sock_common *sk_common = ctx->sk_common;
2152767c977SYonghong Song 	struct seq_file *seq = ctx->meta->seq;
2162767c977SYonghong Song 	struct tcp_timewait_sock *tw;
2172767c977SYonghong Song 	struct tcp_request_sock *req;
2182767c977SYonghong Song 	struct tcp6_sock *tp;
2192767c977SYonghong Song 	uid_t uid = ctx->uid;
2202767c977SYonghong Song 	__u32 seq_num;
2212767c977SYonghong Song 
2222767c977SYonghong Song 	if (sk_common == (void *)0)
2232767c977SYonghong Song 		return 0;
2242767c977SYonghong Song 
2252767c977SYonghong Song 	seq_num = ctx->meta->seq_num;
2262767c977SYonghong Song 	if (seq_num == 0)
2272767c977SYonghong Song 		BPF_SEQ_PRINTF(seq, "  sl  "
2282767c977SYonghong Song 				    "local_address                         "
2292767c977SYonghong Song 				    "remote_address                        "
2302767c977SYonghong Song 				    "st tx_queue rx_queue tr tm->when retrnsmt"
2312767c977SYonghong Song 				    "   uid  timeout inode\n");
2322767c977SYonghong Song 
2332767c977SYonghong Song 	if (sk_common->skc_family != AF_INET6)
2342767c977SYonghong Song 		return 0;
2352767c977SYonghong Song 
2362767c977SYonghong Song 	tp = bpf_skc_to_tcp6_sock(sk_common);
2372767c977SYonghong Song 	if (tp)
2382767c977SYonghong Song 		return dump_tcp6_sock(seq, tp, uid, seq_num);
2392767c977SYonghong Song 
2402767c977SYonghong Song 	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
2412767c977SYonghong Song 	if (tw)
2422767c977SYonghong Song 		return dump_tw_sock(seq, tw, uid, seq_num);
2432767c977SYonghong Song 
2442767c977SYonghong Song 	req = bpf_skc_to_tcp_request_sock(sk_common);
2452767c977SYonghong Song 	if (req)
2462767c977SYonghong Song 		return dump_req_sock(seq, req, uid, seq_num);
2472767c977SYonghong Song 
2482767c977SYonghong Song 	return 0;
2492767c977SYonghong Song }
250