1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include "bpf_iter.h"
4 #include "bpf_tracing_net.h"
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_endian.h>
7 
8 char _license[] SEC("license") = "GPL";
9 
hlist_unhashed_lockless(const struct hlist_node * h)10 static int hlist_unhashed_lockless(const struct hlist_node *h)
11 {
12         return !(h->pprev);
13 }
14 
timer_pending(const struct timer_list * timer)15 static int timer_pending(const struct timer_list * timer)
16 {
17 	return !hlist_unhashed_lockless(&timer->entry);
18 }
19 
20 extern unsigned CONFIG_HZ __kconfig;
21 
22 #define USER_HZ		100
23 #define NSEC_PER_SEC	1000000000ULL
jiffies_to_clock_t(unsigned long x)24 static clock_t jiffies_to_clock_t(unsigned long x)
25 {
26 	/* The implementation here tailored to a particular
27 	 * setting of USER_HZ.
28 	 */
29 	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
30 	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
31 
32 	if ((tick_nsec % user_hz_nsec) == 0) {
33 		if (CONFIG_HZ < USER_HZ)
34 			return x * (USER_HZ / CONFIG_HZ);
35 		else
36 			return x / (CONFIG_HZ / USER_HZ);
37 	}
38 	return x * tick_nsec/user_hz_nsec;
39 }
40 
jiffies_delta_to_clock_t(long delta)41 static clock_t jiffies_delta_to_clock_t(long delta)
42 {
43 	if (delta <= 0)
44 		return 0;
45 
46 	return jiffies_to_clock_t(delta);
47 }
48 
sock_i_ino(const struct sock * sk)49 static long sock_i_ino(const struct sock *sk)
50 {
51 	const struct socket *sk_socket = sk->sk_socket;
52 	const struct inode *inode;
53 	unsigned long ino;
54 
55 	if (!sk_socket)
56 		return 0;
57 
58 	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
59 	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
60 	return ino;
61 }
62 
63 static bool
inet_csk_in_pingpong_mode(const struct inet_connection_sock * icsk)64 inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
65 {
66 	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
67 }
68 
tcp_in_initial_slowstart(const struct tcp_sock * tcp)69 static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
70 {
71 	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
72 }
73 
dump_tcp_sock(struct seq_file * seq,struct tcp_sock * tp,uid_t uid,__u32 seq_num)74 static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
75 			 uid_t uid, __u32 seq_num)
76 {
77 	const struct inet_connection_sock *icsk;
78 	const struct fastopen_queue *fastopenq;
79 	const struct inet_sock *inet;
80 	unsigned long timer_expires;
81 	const struct sock *sp;
82 	__u16 destp, srcp;
83 	__be32 dest, src;
84 	int timer_active;
85 	int rx_queue;
86 	int state;
87 
88 	icsk = &tp->inet_conn;
89 	inet = &icsk->icsk_inet;
90 	sp = &inet->sk;
91 	fastopenq = &icsk->icsk_accept_queue.fastopenq;
92 
93 	dest = inet->inet_daddr;
94 	src = inet->inet_rcv_saddr;
95 	destp = bpf_ntohs(inet->inet_dport);
96 	srcp = bpf_ntohs(inet->inet_sport);
97 
98 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
99 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
100 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
101 		timer_active = 1;
102 		timer_expires = icsk->icsk_timeout;
103 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
104 		timer_active = 4;
105 		timer_expires = icsk->icsk_timeout;
106 	} else if (timer_pending(&sp->sk_timer)) {
107 		timer_active = 2;
108 		timer_expires = sp->sk_timer.expires;
109 	} else {
110 		timer_active = 0;
111 		timer_expires = bpf_jiffies64();
112 	}
113 
114 	state = sp->sk_state;
115 	if (state == TCP_LISTEN) {
116 		rx_queue = sp->sk_ack_backlog;
117 	} else {
118 		rx_queue = tp->rcv_nxt - tp->copied_seq;
119 		if (rx_queue < 0)
120 			rx_queue = 0;
121 	}
122 
123 	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
124 		       seq_num, src, srcp, dest, destp);
125 	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
126 		       state,
127 		       tp->write_seq - tp->snd_una, rx_queue,
128 		       timer_active,
129 		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
130 		       icsk->icsk_retransmits, uid,
131 		       icsk->icsk_probes_out,
132 		       sock_i_ino(sp),
133 		       sp->sk_refcnt.refs.counter);
134 	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
135 		       tp,
136 		       jiffies_to_clock_t(icsk->icsk_rto),
137 		       jiffies_to_clock_t(icsk->icsk_ack.ato),
138 		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
139 		       tp->snd_cwnd,
140 		       state == TCP_LISTEN ? fastopenq->max_qlen
141 				: (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
142 		      );
143 
144 	return 0;
145 }
146 
dump_tw_sock(struct seq_file * seq,struct tcp_timewait_sock * ttw,uid_t uid,__u32 seq_num)147 static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
148 			uid_t uid, __u32 seq_num)
149 {
150 	struct inet_timewait_sock *tw = &ttw->tw_sk;
151 	__u16 destp, srcp;
152 	__be32 dest, src;
153 	long delta;
154 
155 	delta = tw->tw_timer.expires - bpf_jiffies64();
156 	dest = tw->tw_daddr;
157 	src  = tw->tw_rcv_saddr;
158 	destp = bpf_ntohs(tw->tw_dport);
159 	srcp  = bpf_ntohs(tw->tw_sport);
160 
161 	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
162 		       seq_num, src, srcp, dest, destp);
163 
164 	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
165 		       tw->tw_substate, 0, 0,
166 		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
167 		       tw->tw_refcnt.refs.counter, tw);
168 
169 	return 0;
170 }
171 
dump_req_sock(struct seq_file * seq,struct tcp_request_sock * treq,uid_t uid,__u32 seq_num)172 static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
173 			 uid_t uid, __u32 seq_num)
174 {
175 	struct inet_request_sock *irsk = &treq->req;
176 	struct request_sock *req = &irsk->req;
177 	long ttd;
178 
179 	ttd = req->rsk_timer.expires - bpf_jiffies64();
180 
181 	if (ttd < 0)
182 		ttd = 0;
183 
184 	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
185 		       seq_num, irsk->ir_loc_addr,
186 		       irsk->ir_num, irsk->ir_rmt_addr,
187 		       bpf_ntohs(irsk->ir_rmt_port));
188 	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
189 		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
190 		       req->num_timeout, uid, 0, 0, 0, req);
191 
192 	return 0;
193 }
194 
195 SEC("iter/tcp")
dump_tcp4(struct bpf_iter__tcp * ctx)196 int dump_tcp4(struct bpf_iter__tcp *ctx)
197 {
198 	struct sock_common *sk_common = ctx->sk_common;
199 	struct seq_file *seq = ctx->meta->seq;
200 	struct tcp_timewait_sock *tw;
201 	struct tcp_request_sock *req;
202 	struct tcp_sock *tp;
203 	uid_t uid = ctx->uid;
204 	__u32 seq_num;
205 
206 	if (sk_common == (void *)0)
207 		return 0;
208 
209 	seq_num = ctx->meta->seq_num;
210 	if (seq_num == 0)
211 		BPF_SEQ_PRINTF(seq, "  sl  "
212 				    "local_address "
213 				    "rem_address   "
214 				    "st tx_queue rx_queue tr tm->when retrnsmt"
215 				    "   uid  timeout inode\n");
216 
217 	if (sk_common->skc_family != AF_INET)
218 		return 0;
219 
220 	tp = bpf_skc_to_tcp_sock(sk_common);
221 	if (tp)
222 		return dump_tcp_sock(seq, tp, uid, seq_num);
223 
224 	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
225 	if (tw)
226 		return dump_tw_sock(seq, tw, uid, seq_num);
227 
228 	req = bpf_skc_to_tcp_request_sock(sk_common);
229 	if (req)
230 		return dump_req_sock(seq, req, uid, seq_num);
231 
232 	return 0;
233 }
234