xref: /openbmc/linux/net/ipv4/tcp_diag.c (revision 463c84b9)
1 /*
2  * tcp_diag.c	Module for monitoring TCP sockets.
3  *
4  * Version:	$Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
5  *
6  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 #include <linux/config.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/fcntl.h>
18 #include <linux/random.h>
19 #include <linux/cache.h>
20 #include <linux/init.h>
21 #include <linux/time.h>
22 
23 #include <net/icmp.h>
24 #include <net/tcp.h>
25 #include <net/ipv6.h>
26 #include <net/inet_common.h>
27 
28 #include <linux/inet.h>
29 #include <linux/stddef.h>
30 
31 #include <linux/tcp_diag.h>
32 
33 struct tcpdiag_entry
34 {
35 	u32 *saddr;
36 	u32 *daddr;
37 	u16 sport;
38 	u16 dport;
39 	u16 family;
40 	u16 userlocks;
41 };
42 
43 static struct sock *tcpnl;
44 
45 #define TCPDIAG_PUT(skb, attrtype, attrlen) \
46 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
47 
48 static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
49 			int ext, u32 pid, u32 seq, u16 nlmsg_flags)
50 {
51 	const struct inet_sock *inet = inet_sk(sk);
52 	struct tcp_sock *tp = tcp_sk(sk);
53 	const struct inet_connection_sock *icsk = inet_csk(sk);
54 	struct tcpdiagmsg *r;
55 	struct nlmsghdr  *nlh;
56 	struct tcp_info  *info = NULL;
57 	struct tcpdiag_meminfo  *minfo = NULL;
58 	unsigned char	 *b = skb->tail;
59 
60 	nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
61 	nlh->nlmsg_flags = nlmsg_flags;
62 	r = NLMSG_DATA(nlh);
63 	if (sk->sk_state != TCP_TIME_WAIT) {
64 		if (ext & (1<<(TCPDIAG_MEMINFO-1)))
65 			minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
66 		if (ext & (1<<(TCPDIAG_INFO-1)))
67 			info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
68 
69 		if (ext & (1<<(TCPDIAG_CONG-1))) {
70 			size_t len = strlen(tp->ca_ops->name);
71 			strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1),
72 			       tp->ca_ops->name);
73 		}
74 	}
75 	r->tcpdiag_family = sk->sk_family;
76 	r->tcpdiag_state = sk->sk_state;
77 	r->tcpdiag_timer = 0;
78 	r->tcpdiag_retrans = 0;
79 
80 	r->id.tcpdiag_if = sk->sk_bound_dev_if;
81 	r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk;
82 	r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
83 
84 	if (r->tcpdiag_state == TCP_TIME_WAIT) {
85 		const struct inet_timewait_sock *tw = inet_twsk(sk);
86 		long tmo = tw->tw_ttd - jiffies;
87 		if (tmo < 0)
88 			tmo = 0;
89 
90 		r->id.tcpdiag_sport = tw->tw_sport;
91 		r->id.tcpdiag_dport = tw->tw_dport;
92 		r->id.tcpdiag_src[0] = tw->tw_rcv_saddr;
93 		r->id.tcpdiag_dst[0] = tw->tw_daddr;
94 		r->tcpdiag_state = tw->tw_substate;
95 		r->tcpdiag_timer = 3;
96 		r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
97 		r->tcpdiag_rqueue = 0;
98 		r->tcpdiag_wqueue = 0;
99 		r->tcpdiag_uid = 0;
100 		r->tcpdiag_inode = 0;
101 #ifdef CONFIG_IP_TCPDIAG_IPV6
102 		if (r->tcpdiag_family == AF_INET6) {
103 			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
104 
105 			ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
106 				       &tcp6tw->tw_v6_rcv_saddr);
107 			ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
108 				       &tcp6tw->tw_v6_daddr);
109 		}
110 #endif
111 		nlh->nlmsg_len = skb->tail - b;
112 		return skb->len;
113 	}
114 
115 	r->id.tcpdiag_sport = inet->sport;
116 	r->id.tcpdiag_dport = inet->dport;
117 	r->id.tcpdiag_src[0] = inet->rcv_saddr;
118 	r->id.tcpdiag_dst[0] = inet->daddr;
119 
120 #ifdef CONFIG_IP_TCPDIAG_IPV6
121 	if (r->tcpdiag_family == AF_INET6) {
122 		struct ipv6_pinfo *np = inet6_sk(sk);
123 
124 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
125 			       &np->rcv_saddr);
126 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
127 			       &np->daddr);
128 	}
129 #endif
130 
131 #define EXPIRES_IN_MS(tmo)  ((tmo-jiffies)*1000+HZ-1)/HZ
132 
133 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
134 		r->tcpdiag_timer = 1;
135 		r->tcpdiag_retrans = icsk->icsk_retransmits;
136 		r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
137 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
138 		r->tcpdiag_timer = 4;
139 		r->tcpdiag_retrans = tp->probes_out;
140 		r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
141 	} else if (timer_pending(&sk->sk_timer)) {
142 		r->tcpdiag_timer = 2;
143 		r->tcpdiag_retrans = tp->probes_out;
144 		r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
145 	} else {
146 		r->tcpdiag_timer = 0;
147 		r->tcpdiag_expires = 0;
148 	}
149 #undef EXPIRES_IN_MS
150 
151 	r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
152 	r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
153 	r->tcpdiag_uid = sock_i_uid(sk);
154 	r->tcpdiag_inode = sock_i_ino(sk);
155 
156 	if (minfo) {
157 		minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc);
158 		minfo->tcpdiag_wmem = sk->sk_wmem_queued;
159 		minfo->tcpdiag_fmem = sk->sk_forward_alloc;
160 		minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc);
161 	}
162 
163 	if (info)
164 		tcp_get_info(sk, info);
165 
166 	if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info)
167 		tp->ca_ops->get_info(tp, ext, skb);
168 
169 	nlh->nlmsg_len = skb->tail - b;
170 	return skb->len;
171 
172 rtattr_failure:
173 nlmsg_failure:
174 	skb_trim(skb, b - skb->data);
175 	return -1;
176 }
177 
178 #ifdef CONFIG_IP_TCPDIAG_IPV6
179 extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
180 				  struct in6_addr *daddr, u16 dport,
181 				  int dif);
182 #else
183 static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
184 					 struct in6_addr *daddr, u16 dport,
185 					 int dif)
186 {
187 	return NULL;
188 }
189 #endif
190 
191 static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
192 {
193 	int err;
194 	struct sock *sk;
195 	struct tcpdiagreq *req = NLMSG_DATA(nlh);
196 	struct sk_buff *rep;
197 
198 	if (req->tcpdiag_family == AF_INET) {
199 		sk = inet_lookup(&tcp_hashinfo, req->id.tcpdiag_dst[0],
200 				 req->id.tcpdiag_dport, req->id.tcpdiag_src[0],
201 				 req->id.tcpdiag_sport, req->id.tcpdiag_if);
202 	}
203 #ifdef CONFIG_IP_TCPDIAG_IPV6
204 	else if (req->tcpdiag_family == AF_INET6) {
205 		sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
206 				   (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
207 				   req->id.tcpdiag_if);
208 	}
209 #endif
210 	else {
211 		return -EINVAL;
212 	}
213 
214 	if (sk == NULL)
215 		return -ENOENT;
216 
217 	err = -ESTALE;
218 	if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
219 	     req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
220 	    ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] ||
221 	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1]))
222 		goto out;
223 
224 	err = -ENOMEM;
225 	rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
226 				    sizeof(struct tcpdiag_meminfo)+
227 				    sizeof(struct tcp_info)+64), GFP_KERNEL);
228 	if (!rep)
229 		goto out;
230 
231 	if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
232 			 NETLINK_CB(in_skb).pid,
233 			 nlh->nlmsg_seq, 0) <= 0)
234 		BUG();
235 
236 	err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
237 	if (err > 0)
238 		err = 0;
239 
240 out:
241 	if (sk) {
242 		if (sk->sk_state == TCP_TIME_WAIT)
243 			inet_twsk_put((struct inet_timewait_sock *)sk);
244 		else
245 			sock_put(sk);
246 	}
247 	return err;
248 }
249 
250 static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
251 {
252 	int words = bits >> 5;
253 
254 	bits &= 0x1f;
255 
256 	if (words) {
257 		if (memcmp(a1, a2, words << 2))
258 			return 0;
259 	}
260 	if (bits) {
261 		__u32 w1, w2;
262 		__u32 mask;
263 
264 		w1 = a1[words];
265 		w2 = a2[words];
266 
267 		mask = htonl((0xffffffff) << (32 - bits));
268 
269 		if ((w1 ^ w2) & mask)
270 			return 0;
271 	}
272 
273 	return 1;
274 }
275 
276 
277 static int tcpdiag_bc_run(const void *bc, int len,
278 			  const struct tcpdiag_entry *entry)
279 {
280 	while (len > 0) {
281 		int yes = 1;
282 		const struct tcpdiag_bc_op *op = bc;
283 
284 		switch (op->code) {
285 		case TCPDIAG_BC_NOP:
286 			break;
287 		case TCPDIAG_BC_JMP:
288 			yes = 0;
289 			break;
290 		case TCPDIAG_BC_S_GE:
291 			yes = entry->sport >= op[1].no;
292 			break;
293 		case TCPDIAG_BC_S_LE:
294 			yes = entry->dport <= op[1].no;
295 			break;
296 		case TCPDIAG_BC_D_GE:
297 			yes = entry->dport >= op[1].no;
298 			break;
299 		case TCPDIAG_BC_D_LE:
300 			yes = entry->dport <= op[1].no;
301 			break;
302 		case TCPDIAG_BC_AUTO:
303 			yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
304 			break;
305 		case TCPDIAG_BC_S_COND:
306 		case TCPDIAG_BC_D_COND:
307 		{
308 			struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
309 			u32 *addr;
310 
311 			if (cond->port != -1 &&
312 			    cond->port != (op->code == TCPDIAG_BC_S_COND ?
313 					     entry->sport : entry->dport)) {
314 				yes = 0;
315 				break;
316 			}
317 
318 			if (cond->prefix_len == 0)
319 				break;
320 
321 			if (op->code == TCPDIAG_BC_S_COND)
322 				addr = entry->saddr;
323 			else
324 				addr = entry->daddr;
325 
326 			if (bitstring_match(addr, cond->addr, cond->prefix_len))
327 				break;
328 			if (entry->family == AF_INET6 &&
329 			    cond->family == AF_INET) {
330 				if (addr[0] == 0 && addr[1] == 0 &&
331 				    addr[2] == htonl(0xffff) &&
332 				    bitstring_match(addr+3, cond->addr, cond->prefix_len))
333 					break;
334 			}
335 			yes = 0;
336 			break;
337 		}
338 		}
339 
340 		if (yes) {
341 			len -= op->yes;
342 			bc += op->yes;
343 		} else {
344 			len -= op->no;
345 			bc += op->no;
346 		}
347 	}
348 	return (len == 0);
349 }
350 
351 static int valid_cc(const void *bc, int len, int cc)
352 {
353 	while (len >= 0) {
354 		const struct tcpdiag_bc_op *op = bc;
355 
356 		if (cc > len)
357 			return 0;
358 		if (cc == len)
359 			return 1;
360 		if (op->yes < 4)
361 			return 0;
362 		len -= op->yes;
363 		bc  += op->yes;
364 	}
365 	return 0;
366 }
367 
368 static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len)
369 {
370 	const unsigned char *bc = bytecode;
371 	int  len = bytecode_len;
372 
373 	while (len > 0) {
374 		struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
375 
376 //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
377 		switch (op->code) {
378 		case TCPDIAG_BC_AUTO:
379 		case TCPDIAG_BC_S_COND:
380 		case TCPDIAG_BC_D_COND:
381 		case TCPDIAG_BC_S_GE:
382 		case TCPDIAG_BC_S_LE:
383 		case TCPDIAG_BC_D_GE:
384 		case TCPDIAG_BC_D_LE:
385 			if (op->yes < 4 || op->yes > len+4)
386 				return -EINVAL;
387 		case TCPDIAG_BC_JMP:
388 			if (op->no < 4 || op->no > len+4)
389 				return -EINVAL;
390 			if (op->no < len &&
391 			    !valid_cc(bytecode, bytecode_len, len-op->no))
392 				return -EINVAL;
393 			break;
394 		case TCPDIAG_BC_NOP:
395 			if (op->yes < 4 || op->yes > len+4)
396 				return -EINVAL;
397 			break;
398 		default:
399 			return -EINVAL;
400 		}
401 		bc += op->yes;
402 		len -= op->yes;
403 	}
404 	return len == 0 ? 0 : -EINVAL;
405 }
406 
407 static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
408 			     struct netlink_callback *cb)
409 {
410 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
411 
412 	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
413 		struct tcpdiag_entry entry;
414 		struct rtattr *bc = (struct rtattr *)(r + 1);
415 		struct inet_sock *inet = inet_sk(sk);
416 
417 		entry.family = sk->sk_family;
418 #ifdef CONFIG_IP_TCPDIAG_IPV6
419 		if (entry.family == AF_INET6) {
420 			struct ipv6_pinfo *np = inet6_sk(sk);
421 
422 			entry.saddr = np->rcv_saddr.s6_addr32;
423 			entry.daddr = np->daddr.s6_addr32;
424 		} else
425 #endif
426 		{
427 			entry.saddr = &inet->rcv_saddr;
428 			entry.daddr = &inet->daddr;
429 		}
430 		entry.sport = inet->num;
431 		entry.dport = ntohs(inet->dport);
432 		entry.userlocks = sk->sk_userlocks;
433 
434 		if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
435 			return 0;
436 	}
437 
438 	return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid,
439 			    cb->nlh->nlmsg_seq, NLM_F_MULTI);
440 }
441 
442 static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
443 			    struct request_sock *req,
444 			    u32 pid, u32 seq)
445 {
446 	const struct inet_request_sock *ireq = inet_rsk(req);
447 	struct inet_sock *inet = inet_sk(sk);
448 	unsigned char *b = skb->tail;
449 	struct tcpdiagmsg *r;
450 	struct nlmsghdr *nlh;
451 	long tmo;
452 
453 	nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
454 	nlh->nlmsg_flags = NLM_F_MULTI;
455 	r = NLMSG_DATA(nlh);
456 
457 	r->tcpdiag_family = sk->sk_family;
458 	r->tcpdiag_state = TCP_SYN_RECV;
459 	r->tcpdiag_timer = 1;
460 	r->tcpdiag_retrans = req->retrans;
461 
462 	r->id.tcpdiag_if = sk->sk_bound_dev_if;
463 	r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req;
464 	r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
465 
466 	tmo = req->expires - jiffies;
467 	if (tmo < 0)
468 		tmo = 0;
469 
470 	r->id.tcpdiag_sport = inet->sport;
471 	r->id.tcpdiag_dport = ireq->rmt_port;
472 	r->id.tcpdiag_src[0] = ireq->loc_addr;
473 	r->id.tcpdiag_dst[0] = ireq->rmt_addr;
474 	r->tcpdiag_expires = jiffies_to_msecs(tmo),
475 	r->tcpdiag_rqueue = 0;
476 	r->tcpdiag_wqueue = 0;
477 	r->tcpdiag_uid = sock_i_uid(sk);
478 	r->tcpdiag_inode = 0;
479 #ifdef CONFIG_IP_TCPDIAG_IPV6
480 	if (r->tcpdiag_family == AF_INET6) {
481 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
482 			       &tcp6_rsk(req)->loc_addr);
483 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
484 			       &tcp6_rsk(req)->rmt_addr);
485 	}
486 #endif
487 	nlh->nlmsg_len = skb->tail - b;
488 
489 	return skb->len;
490 
491 nlmsg_failure:
492 	skb_trim(skb, b - skb->data);
493 	return -1;
494 }
495 
496 static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
497 			     struct netlink_callback *cb)
498 {
499 	struct tcpdiag_entry entry;
500 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
501 	struct inet_connection_sock *icsk = inet_csk(sk);
502 	struct listen_sock *lopt;
503 	struct rtattr *bc = NULL;
504 	struct inet_sock *inet = inet_sk(sk);
505 	int j, s_j;
506 	int reqnum, s_reqnum;
507 	int err = 0;
508 
509 	s_j = cb->args[3];
510 	s_reqnum = cb->args[4];
511 
512 	if (s_j > 0)
513 		s_j--;
514 
515 	entry.family = sk->sk_family;
516 
517 	read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
518 
519 	lopt = icsk->icsk_accept_queue.listen_opt;
520 	if (!lopt || !lopt->qlen)
521 		goto out;
522 
523 	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
524 		bc = (struct rtattr *)(r + 1);
525 		entry.sport = inet->num;
526 		entry.userlocks = sk->sk_userlocks;
527 	}
528 
529 	for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
530 		struct request_sock *req, *head = lopt->syn_table[j];
531 
532 		reqnum = 0;
533 		for (req = head; req; reqnum++, req = req->dl_next) {
534 			struct inet_request_sock *ireq = inet_rsk(req);
535 
536 			if (reqnum < s_reqnum)
537 				continue;
538 			if (r->id.tcpdiag_dport != ireq->rmt_port &&
539 			    r->id.tcpdiag_dport)
540 				continue;
541 
542 			if (bc) {
543 				entry.saddr =
544 #ifdef CONFIG_IP_TCPDIAG_IPV6
545 					(entry.family == AF_INET6) ?
546 					tcp6_rsk(req)->loc_addr.s6_addr32 :
547 #endif
548 					&ireq->loc_addr;
549 				entry.daddr =
550 #ifdef CONFIG_IP_TCPDIAG_IPV6
551 					(entry.family == AF_INET6) ?
552 					tcp6_rsk(req)->rmt_addr.s6_addr32 :
553 #endif
554 					&ireq->rmt_addr;
555 				entry.dport = ntohs(ireq->rmt_port);
556 
557 				if (!tcpdiag_bc_run(RTA_DATA(bc),
558 						    RTA_PAYLOAD(bc), &entry))
559 					continue;
560 			}
561 
562 			err = tcpdiag_fill_req(skb, sk, req,
563 					       NETLINK_CB(cb->skb).pid,
564 					       cb->nlh->nlmsg_seq);
565 			if (err < 0) {
566 				cb->args[3] = j + 1;
567 				cb->args[4] = reqnum;
568 				goto out;
569 			}
570 		}
571 
572 		s_reqnum = 0;
573 	}
574 
575 out:
576 	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
577 
578 	return err;
579 }
580 
581 static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
582 {
583 	int i, num;
584 	int s_i, s_num;
585 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
586 
587 	s_i = cb->args[1];
588 	s_num = num = cb->args[2];
589 
590 	if (cb->args[0] == 0) {
591 		if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
592 			goto skip_listen_ht;
593 		inet_listen_lock(&tcp_hashinfo);
594 		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
595 			struct sock *sk;
596 			struct hlist_node *node;
597 
598 			num = 0;
599 			sk_for_each(sk, node, &tcp_hashinfo.listening_hash[i]) {
600 				struct inet_sock *inet = inet_sk(sk);
601 
602 				if (num < s_num) {
603 					num++;
604 					continue;
605 				}
606 
607 				if (r->id.tcpdiag_sport != inet->sport &&
608 				    r->id.tcpdiag_sport)
609 					goto next_listen;
610 
611 				if (!(r->tcpdiag_states&TCPF_LISTEN) ||
612 				    r->id.tcpdiag_dport ||
613 				    cb->args[3] > 0)
614 					goto syn_recv;
615 
616 				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
617 					inet_listen_unlock(&tcp_hashinfo);
618 					goto done;
619 				}
620 
621 syn_recv:
622 				if (!(r->tcpdiag_states&TCPF_SYN_RECV))
623 					goto next_listen;
624 
625 				if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
626 					inet_listen_unlock(&tcp_hashinfo);
627 					goto done;
628 				}
629 
630 next_listen:
631 				cb->args[3] = 0;
632 				cb->args[4] = 0;
633 				++num;
634 			}
635 
636 			s_num = 0;
637 			cb->args[3] = 0;
638 			cb->args[4] = 0;
639 		}
640 		inet_listen_unlock(&tcp_hashinfo);
641 skip_listen_ht:
642 		cb->args[0] = 1;
643 		s_i = num = s_num = 0;
644 	}
645 
646 	if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
647 		return skb->len;
648 
649 	for (i = s_i; i < tcp_hashinfo.ehash_size; i++) {
650 		struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[i];
651 		struct sock *sk;
652 		struct hlist_node *node;
653 
654 		if (i > s_i)
655 			s_num = 0;
656 
657 		read_lock_bh(&head->lock);
658 
659 		num = 0;
660 		sk_for_each(sk, node, &head->chain) {
661 			struct inet_sock *inet = inet_sk(sk);
662 
663 			if (num < s_num)
664 				goto next_normal;
665 			if (!(r->tcpdiag_states & (1 << sk->sk_state)))
666 				goto next_normal;
667 			if (r->id.tcpdiag_sport != inet->sport &&
668 			    r->id.tcpdiag_sport)
669 				goto next_normal;
670 			if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport)
671 				goto next_normal;
672 			if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
673 				read_unlock_bh(&head->lock);
674 				goto done;
675 			}
676 next_normal:
677 			++num;
678 		}
679 
680 		if (r->tcpdiag_states&TCPF_TIME_WAIT) {
681 			sk_for_each(sk, node,
682 				    &tcp_hashinfo.ehash[i + tcp_hashinfo.ehash_size].chain) {
683 				struct inet_sock *inet = inet_sk(sk);
684 
685 				if (num < s_num)
686 					goto next_dying;
687 				if (r->id.tcpdiag_sport != inet->sport &&
688 				    r->id.tcpdiag_sport)
689 					goto next_dying;
690 				if (r->id.tcpdiag_dport != inet->dport &&
691 				    r->id.tcpdiag_dport)
692 					goto next_dying;
693 				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
694 					read_unlock_bh(&head->lock);
695 					goto done;
696 				}
697 next_dying:
698 				++num;
699 			}
700 		}
701 		read_unlock_bh(&head->lock);
702 	}
703 
704 done:
705 	cb->args[1] = i;
706 	cb->args[2] = num;
707 	return skb->len;
708 }
709 
710 static int tcpdiag_dump_done(struct netlink_callback *cb)
711 {
712 	return 0;
713 }
714 
715 
716 static __inline__ int
717 tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
718 {
719 	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
720 		return 0;
721 
722 	if (nlh->nlmsg_type != TCPDIAG_GETSOCK)
723 		goto err_inval;
724 
725 	if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
726 		goto err_inval;
727 
728 	if (nlh->nlmsg_flags&NLM_F_DUMP) {
729 		if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
730 			struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
731 			if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
732 			    rta->rta_len < 8 ||
733 			    rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
734 				goto err_inval;
735 			if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
736 				goto err_inval;
737 		}
738 		return netlink_dump_start(tcpnl, skb, nlh,
739 					  tcpdiag_dump,
740 					  tcpdiag_dump_done);
741 	} else {
742 		return tcpdiag_get_exact(skb, nlh);
743 	}
744 
745 err_inval:
746 	return -EINVAL;
747 }
748 
749 
750 static inline void tcpdiag_rcv_skb(struct sk_buff *skb)
751 {
752 	int err;
753 	struct nlmsghdr * nlh;
754 
755 	if (skb->len >= NLMSG_SPACE(0)) {
756 		nlh = (struct nlmsghdr *)skb->data;
757 		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
758 			return;
759 		err = tcpdiag_rcv_msg(skb, nlh);
760 		if (err || nlh->nlmsg_flags & NLM_F_ACK)
761 			netlink_ack(skb, nlh, err);
762 	}
763 }
764 
765 static void tcpdiag_rcv(struct sock *sk, int len)
766 {
767 	struct sk_buff *skb;
768 	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
769 
770 	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
771 		tcpdiag_rcv_skb(skb);
772 		kfree_skb(skb);
773 	}
774 }
775 
776 static int __init tcpdiag_init(void)
777 {
778 	tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv,
779 				      THIS_MODULE);
780 	if (tcpnl == NULL)
781 		return -ENOMEM;
782 	return 0;
783 }
784 
785 static void __exit tcpdiag_exit(void)
786 {
787 	sock_release(tcpnl->sk_socket);
788 }
789 
790 module_init(tcpdiag_init);
791 module_exit(tcpdiag_exit);
792 MODULE_LICENSE("GPL");
793