xref: /openbmc/linux/net/ipv4/tcp_diag.c (revision e41aac41)
1 /*
2  * tcp_diag.c	Module for monitoring TCP sockets.
3  *
4  * Version:	$Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
5  *
6  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 #include <linux/config.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/fcntl.h>
18 #include <linux/random.h>
19 #include <linux/cache.h>
20 #include <linux/init.h>
21 #include <linux/time.h>
22 
23 #include <net/icmp.h>
24 #include <net/tcp.h>
25 #include <net/ipv6.h>
26 #include <net/inet_common.h>
27 
28 #include <linux/inet.h>
29 #include <linux/stddef.h>
30 
31 #include <linux/tcp_diag.h>
32 
33 struct tcpdiag_entry
34 {
35 	u32 *saddr;
36 	u32 *daddr;
37 	u16 sport;
38 	u16 dport;
39 	u16 family;
40 	u16 userlocks;
41 };
42 
43 static struct sock *tcpnl;
44 
45 #define TCPDIAG_PUT(skb, attrtype, attrlen) \
46 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
47 
48 #ifdef CONFIG_IP_TCPDIAG_DCCP
49 extern struct inet_hashinfo dccp_hashinfo;
50 #endif
51 
52 static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
53 			int ext, u32 pid, u32 seq, u16 nlmsg_flags,
54 			const struct nlmsghdr *unlh)
55 {
56 	const struct inet_sock *inet = inet_sk(sk);
57 	const struct inet_connection_sock *icsk = inet_csk(sk);
58 	struct tcpdiagmsg *r;
59 	struct nlmsghdr  *nlh;
60 	struct tcp_info  *info = NULL;
61 	struct tcpdiag_meminfo  *minfo = NULL;
62 	unsigned char	 *b = skb->tail;
63 
64 	nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
65 	nlh->nlmsg_flags = nlmsg_flags;
66 	r = NLMSG_DATA(nlh);
67 	if (sk->sk_state != TCP_TIME_WAIT) {
68 		if (ext & (1<<(TCPDIAG_MEMINFO-1)))
69 			minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
70 		if (ext & (1<<(TCPDIAG_INFO-1)))
71 			info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
72 
73 		if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) {
74 			size_t len = strlen(icsk->icsk_ca_ops->name);
75 			strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1),
76 			       icsk->icsk_ca_ops->name);
77 		}
78 	}
79 	r->tcpdiag_family = sk->sk_family;
80 	r->tcpdiag_state = sk->sk_state;
81 	r->tcpdiag_timer = 0;
82 	r->tcpdiag_retrans = 0;
83 
84 	r->id.tcpdiag_if = sk->sk_bound_dev_if;
85 	r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk;
86 	r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
87 
88 	if (r->tcpdiag_state == TCP_TIME_WAIT) {
89 		const struct inet_timewait_sock *tw = inet_twsk(sk);
90 		long tmo = tw->tw_ttd - jiffies;
91 		if (tmo < 0)
92 			tmo = 0;
93 
94 		r->id.tcpdiag_sport = tw->tw_sport;
95 		r->id.tcpdiag_dport = tw->tw_dport;
96 		r->id.tcpdiag_src[0] = tw->tw_rcv_saddr;
97 		r->id.tcpdiag_dst[0] = tw->tw_daddr;
98 		r->tcpdiag_state = tw->tw_substate;
99 		r->tcpdiag_timer = 3;
100 		r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
101 		r->tcpdiag_rqueue = 0;
102 		r->tcpdiag_wqueue = 0;
103 		r->tcpdiag_uid = 0;
104 		r->tcpdiag_inode = 0;
105 #ifdef CONFIG_IP_TCPDIAG_IPV6
106 		if (r->tcpdiag_family == AF_INET6) {
107 			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
108 
109 			ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
110 				       &tcp6tw->tw_v6_rcv_saddr);
111 			ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
112 				       &tcp6tw->tw_v6_daddr);
113 		}
114 #endif
115 		nlh->nlmsg_len = skb->tail - b;
116 		return skb->len;
117 	}
118 
119 	r->id.tcpdiag_sport = inet->sport;
120 	r->id.tcpdiag_dport = inet->dport;
121 	r->id.tcpdiag_src[0] = inet->rcv_saddr;
122 	r->id.tcpdiag_dst[0] = inet->daddr;
123 
124 #ifdef CONFIG_IP_TCPDIAG_IPV6
125 	if (r->tcpdiag_family == AF_INET6) {
126 		struct ipv6_pinfo *np = inet6_sk(sk);
127 
128 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
129 			       &np->rcv_saddr);
130 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
131 			       &np->daddr);
132 	}
133 #endif
134 
135 #define EXPIRES_IN_MS(tmo)  ((tmo-jiffies)*1000+HZ-1)/HZ
136 
137 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
138 		r->tcpdiag_timer = 1;
139 		r->tcpdiag_retrans = icsk->icsk_retransmits;
140 		r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
141 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
142 		r->tcpdiag_timer = 4;
143 		r->tcpdiag_retrans = icsk->icsk_probes_out;
144 		r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
145 	} else if (timer_pending(&sk->sk_timer)) {
146 		r->tcpdiag_timer = 2;
147 		r->tcpdiag_retrans = icsk->icsk_probes_out;
148 		r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
149 	} else {
150 		r->tcpdiag_timer = 0;
151 		r->tcpdiag_expires = 0;
152 	}
153 #undef EXPIRES_IN_MS
154 	/*
155 	 * Ahem... for now we'll have some knowledge about TCP -acme
156 	 * But this is just one of two small exceptions, both in this
157 	 * function, so lets close our eyes for some 15 lines or so... 8)
158 	 * -acme
159 	 */
160 	if (sk->sk_protocol == IPPROTO_TCP) {
161 		const struct tcp_sock *tp = tcp_sk(sk);
162 
163 		r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
164 		r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
165 	} else
166 		r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0;
167 
168 	r->tcpdiag_uid = sock_i_uid(sk);
169 	r->tcpdiag_inode = sock_i_ino(sk);
170 
171 	if (minfo) {
172 		minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc);
173 		minfo->tcpdiag_wmem = sk->sk_wmem_queued;
174 		minfo->tcpdiag_fmem = sk->sk_forward_alloc;
175 		minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc);
176 	}
177 
178 	/* Ahem... for now we'll have some knowledge about TCP -acme */
179 	if (info) {
180 		if (sk->sk_protocol == IPPROTO_TCP)
181 			tcp_get_info(sk, info);
182 		else
183 			memset(info, 0, sizeof(*info));
184 	}
185 
186 	if (sk->sk_state < TCP_TIME_WAIT &&
187 	    icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
188 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
189 
190 	nlh->nlmsg_len = skb->tail - b;
191 	return skb->len;
192 
193 rtattr_failure:
194 nlmsg_failure:
195 	skb_trim(skb, b - skb->data);
196 	return -1;
197 }
198 
199 #ifdef CONFIG_IP_TCPDIAG_IPV6
200 extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
201 				  struct in6_addr *daddr, u16 dport,
202 				  int dif);
203 #else
204 static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
205 					 struct in6_addr *daddr, u16 dport,
206 					 int dif)
207 {
208 	return NULL;
209 }
210 #endif
211 
212 static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
213 {
214 	int err;
215 	struct sock *sk;
216 	struct tcpdiagreq *req = NLMSG_DATA(nlh);
217 	struct sk_buff *rep;
218 	struct inet_hashinfo *hashinfo = &tcp_hashinfo;
219 #ifdef CONFIG_IP_TCPDIAG_DCCP
220 	if (nlh->nlmsg_type == DCCPDIAG_GETSOCK)
221 		hashinfo = &dccp_hashinfo;
222 #endif
223 	if (req->tcpdiag_family == AF_INET) {
224 		sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0],
225 				 req->id.tcpdiag_dport, req->id.tcpdiag_src[0],
226 				 req->id.tcpdiag_sport, req->id.tcpdiag_if);
227 	}
228 #ifdef CONFIG_IP_TCPDIAG_IPV6
229 	else if (req->tcpdiag_family == AF_INET6) {
230 		sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
231 				   (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
232 				   req->id.tcpdiag_if);
233 	}
234 #endif
235 	else {
236 		return -EINVAL;
237 	}
238 
239 	if (sk == NULL)
240 		return -ENOENT;
241 
242 	err = -ESTALE;
243 	if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
244 	     req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
245 	    ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] ||
246 	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1]))
247 		goto out;
248 
249 	err = -ENOMEM;
250 	rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
251 				    sizeof(struct tcpdiag_meminfo)+
252 				    sizeof(struct tcp_info)+64), GFP_KERNEL);
253 	if (!rep)
254 		goto out;
255 
256 	if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
257 			 NETLINK_CB(in_skb).pid,
258 			 nlh->nlmsg_seq, 0, nlh) <= 0)
259 		BUG();
260 
261 	err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
262 	if (err > 0)
263 		err = 0;
264 
265 out:
266 	if (sk) {
267 		if (sk->sk_state == TCP_TIME_WAIT)
268 			inet_twsk_put((struct inet_timewait_sock *)sk);
269 		else
270 			sock_put(sk);
271 	}
272 	return err;
273 }
274 
275 static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
276 {
277 	int words = bits >> 5;
278 
279 	bits &= 0x1f;
280 
281 	if (words) {
282 		if (memcmp(a1, a2, words << 2))
283 			return 0;
284 	}
285 	if (bits) {
286 		__u32 w1, w2;
287 		__u32 mask;
288 
289 		w1 = a1[words];
290 		w2 = a2[words];
291 
292 		mask = htonl((0xffffffff) << (32 - bits));
293 
294 		if ((w1 ^ w2) & mask)
295 			return 0;
296 	}
297 
298 	return 1;
299 }
300 
301 
302 static int tcpdiag_bc_run(const void *bc, int len,
303 			  const struct tcpdiag_entry *entry)
304 {
305 	while (len > 0) {
306 		int yes = 1;
307 		const struct tcpdiag_bc_op *op = bc;
308 
309 		switch (op->code) {
310 		case TCPDIAG_BC_NOP:
311 			break;
312 		case TCPDIAG_BC_JMP:
313 			yes = 0;
314 			break;
315 		case TCPDIAG_BC_S_GE:
316 			yes = entry->sport >= op[1].no;
317 			break;
318 		case TCPDIAG_BC_S_LE:
319 			yes = entry->dport <= op[1].no;
320 			break;
321 		case TCPDIAG_BC_D_GE:
322 			yes = entry->dport >= op[1].no;
323 			break;
324 		case TCPDIAG_BC_D_LE:
325 			yes = entry->dport <= op[1].no;
326 			break;
327 		case TCPDIAG_BC_AUTO:
328 			yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
329 			break;
330 		case TCPDIAG_BC_S_COND:
331 		case TCPDIAG_BC_D_COND:
332 		{
333 			struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
334 			u32 *addr;
335 
336 			if (cond->port != -1 &&
337 			    cond->port != (op->code == TCPDIAG_BC_S_COND ?
338 					     entry->sport : entry->dport)) {
339 				yes = 0;
340 				break;
341 			}
342 
343 			if (cond->prefix_len == 0)
344 				break;
345 
346 			if (op->code == TCPDIAG_BC_S_COND)
347 				addr = entry->saddr;
348 			else
349 				addr = entry->daddr;
350 
351 			if (bitstring_match(addr, cond->addr, cond->prefix_len))
352 				break;
353 			if (entry->family == AF_INET6 &&
354 			    cond->family == AF_INET) {
355 				if (addr[0] == 0 && addr[1] == 0 &&
356 				    addr[2] == htonl(0xffff) &&
357 				    bitstring_match(addr+3, cond->addr, cond->prefix_len))
358 					break;
359 			}
360 			yes = 0;
361 			break;
362 		}
363 		}
364 
365 		if (yes) {
366 			len -= op->yes;
367 			bc += op->yes;
368 		} else {
369 			len -= op->no;
370 			bc += op->no;
371 		}
372 	}
373 	return (len == 0);
374 }
375 
376 static int valid_cc(const void *bc, int len, int cc)
377 {
378 	while (len >= 0) {
379 		const struct tcpdiag_bc_op *op = bc;
380 
381 		if (cc > len)
382 			return 0;
383 		if (cc == len)
384 			return 1;
385 		if (op->yes < 4)
386 			return 0;
387 		len -= op->yes;
388 		bc  += op->yes;
389 	}
390 	return 0;
391 }
392 
393 static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len)
394 {
395 	const unsigned char *bc = bytecode;
396 	int  len = bytecode_len;
397 
398 	while (len > 0) {
399 		struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
400 
401 //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
402 		switch (op->code) {
403 		case TCPDIAG_BC_AUTO:
404 		case TCPDIAG_BC_S_COND:
405 		case TCPDIAG_BC_D_COND:
406 		case TCPDIAG_BC_S_GE:
407 		case TCPDIAG_BC_S_LE:
408 		case TCPDIAG_BC_D_GE:
409 		case TCPDIAG_BC_D_LE:
410 			if (op->yes < 4 || op->yes > len+4)
411 				return -EINVAL;
412 		case TCPDIAG_BC_JMP:
413 			if (op->no < 4 || op->no > len+4)
414 				return -EINVAL;
415 			if (op->no < len &&
416 			    !valid_cc(bytecode, bytecode_len, len-op->no))
417 				return -EINVAL;
418 			break;
419 		case TCPDIAG_BC_NOP:
420 			if (op->yes < 4 || op->yes > len+4)
421 				return -EINVAL;
422 			break;
423 		default:
424 			return -EINVAL;
425 		}
426 		bc += op->yes;
427 		len -= op->yes;
428 	}
429 	return len == 0 ? 0 : -EINVAL;
430 }
431 
432 static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
433 			     struct netlink_callback *cb)
434 {
435 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
436 
437 	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
438 		struct tcpdiag_entry entry;
439 		struct rtattr *bc = (struct rtattr *)(r + 1);
440 		struct inet_sock *inet = inet_sk(sk);
441 
442 		entry.family = sk->sk_family;
443 #ifdef CONFIG_IP_TCPDIAG_IPV6
444 		if (entry.family == AF_INET6) {
445 			struct ipv6_pinfo *np = inet6_sk(sk);
446 
447 			entry.saddr = np->rcv_saddr.s6_addr32;
448 			entry.daddr = np->daddr.s6_addr32;
449 		} else
450 #endif
451 		{
452 			entry.saddr = &inet->rcv_saddr;
453 			entry.daddr = &inet->daddr;
454 		}
455 		entry.sport = inet->num;
456 		entry.dport = ntohs(inet->dport);
457 		entry.userlocks = sk->sk_userlocks;
458 
459 		if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
460 			return 0;
461 	}
462 
463 	return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid,
464 			    cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
465 }
466 
467 static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
468 			    struct request_sock *req,
469 			    u32 pid, u32 seq,
470 			    const struct nlmsghdr *unlh)
471 {
472 	const struct inet_request_sock *ireq = inet_rsk(req);
473 	struct inet_sock *inet = inet_sk(sk);
474 	unsigned char *b = skb->tail;
475 	struct tcpdiagmsg *r;
476 	struct nlmsghdr *nlh;
477 	long tmo;
478 
479 	nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
480 	nlh->nlmsg_flags = NLM_F_MULTI;
481 	r = NLMSG_DATA(nlh);
482 
483 	r->tcpdiag_family = sk->sk_family;
484 	r->tcpdiag_state = TCP_SYN_RECV;
485 	r->tcpdiag_timer = 1;
486 	r->tcpdiag_retrans = req->retrans;
487 
488 	r->id.tcpdiag_if = sk->sk_bound_dev_if;
489 	r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req;
490 	r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
491 
492 	tmo = req->expires - jiffies;
493 	if (tmo < 0)
494 		tmo = 0;
495 
496 	r->id.tcpdiag_sport = inet->sport;
497 	r->id.tcpdiag_dport = ireq->rmt_port;
498 	r->id.tcpdiag_src[0] = ireq->loc_addr;
499 	r->id.tcpdiag_dst[0] = ireq->rmt_addr;
500 	r->tcpdiag_expires = jiffies_to_msecs(tmo),
501 	r->tcpdiag_rqueue = 0;
502 	r->tcpdiag_wqueue = 0;
503 	r->tcpdiag_uid = sock_i_uid(sk);
504 	r->tcpdiag_inode = 0;
505 #ifdef CONFIG_IP_TCPDIAG_IPV6
506 	if (r->tcpdiag_family == AF_INET6) {
507 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
508 			       &tcp6_rsk(req)->loc_addr);
509 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
510 			       &tcp6_rsk(req)->rmt_addr);
511 	}
512 #endif
513 	nlh->nlmsg_len = skb->tail - b;
514 
515 	return skb->len;
516 
517 nlmsg_failure:
518 	skb_trim(skb, b - skb->data);
519 	return -1;
520 }
521 
522 static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
523 			     struct netlink_callback *cb)
524 {
525 	struct tcpdiag_entry entry;
526 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
527 	struct inet_connection_sock *icsk = inet_csk(sk);
528 	struct listen_sock *lopt;
529 	struct rtattr *bc = NULL;
530 	struct inet_sock *inet = inet_sk(sk);
531 	int j, s_j;
532 	int reqnum, s_reqnum;
533 	int err = 0;
534 
535 	s_j = cb->args[3];
536 	s_reqnum = cb->args[4];
537 
538 	if (s_j > 0)
539 		s_j--;
540 
541 	entry.family = sk->sk_family;
542 
543 	read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
544 
545 	lopt = icsk->icsk_accept_queue.listen_opt;
546 	if (!lopt || !lopt->qlen)
547 		goto out;
548 
549 	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
550 		bc = (struct rtattr *)(r + 1);
551 		entry.sport = inet->num;
552 		entry.userlocks = sk->sk_userlocks;
553 	}
554 
555 	for (j = s_j; j < lopt->nr_table_entries; j++) {
556 		struct request_sock *req, *head = lopt->syn_table[j];
557 
558 		reqnum = 0;
559 		for (req = head; req; reqnum++, req = req->dl_next) {
560 			struct inet_request_sock *ireq = inet_rsk(req);
561 
562 			if (reqnum < s_reqnum)
563 				continue;
564 			if (r->id.tcpdiag_dport != ireq->rmt_port &&
565 			    r->id.tcpdiag_dport)
566 				continue;
567 
568 			if (bc) {
569 				entry.saddr =
570 #ifdef CONFIG_IP_TCPDIAG_IPV6
571 					(entry.family == AF_INET6) ?
572 					tcp6_rsk(req)->loc_addr.s6_addr32 :
573 #endif
574 					&ireq->loc_addr;
575 				entry.daddr =
576 #ifdef CONFIG_IP_TCPDIAG_IPV6
577 					(entry.family == AF_INET6) ?
578 					tcp6_rsk(req)->rmt_addr.s6_addr32 :
579 #endif
580 					&ireq->rmt_addr;
581 				entry.dport = ntohs(ireq->rmt_port);
582 
583 				if (!tcpdiag_bc_run(RTA_DATA(bc),
584 						    RTA_PAYLOAD(bc), &entry))
585 					continue;
586 			}
587 
588 			err = tcpdiag_fill_req(skb, sk, req,
589 					       NETLINK_CB(cb->skb).pid,
590 					       cb->nlh->nlmsg_seq, cb->nlh);
591 			if (err < 0) {
592 				cb->args[3] = j + 1;
593 				cb->args[4] = reqnum;
594 				goto out;
595 			}
596 		}
597 
598 		s_reqnum = 0;
599 	}
600 
601 out:
602 	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
603 
604 	return err;
605 }
606 
607 static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
608 {
609 	int i, num;
610 	int s_i, s_num;
611 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
612 	struct inet_hashinfo *hashinfo;
613 
614 	s_i = cb->args[1];
615 	s_num = num = cb->args[2];
616 		hashinfo = &tcp_hashinfo;
617 #ifdef CONFIG_IP_TCPDIAG_DCCP
618 	if (cb->nlh->nlmsg_type == DCCPDIAG_GETSOCK)
619 		hashinfo = &dccp_hashinfo;
620 #endif
621 	if (cb->args[0] == 0) {
622 		if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
623 			goto skip_listen_ht;
624 
625 		inet_listen_lock(hashinfo);
626 		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
627 			struct sock *sk;
628 			struct hlist_node *node;
629 
630 			num = 0;
631 			sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
632 				struct inet_sock *inet = inet_sk(sk);
633 
634 				if (num < s_num) {
635 					num++;
636 					continue;
637 				}
638 
639 				if (r->id.tcpdiag_sport != inet->sport &&
640 				    r->id.tcpdiag_sport)
641 					goto next_listen;
642 
643 				if (!(r->tcpdiag_states&TCPF_LISTEN) ||
644 				    r->id.tcpdiag_dport ||
645 				    cb->args[3] > 0)
646 					goto syn_recv;
647 
648 				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
649 					inet_listen_unlock(hashinfo);
650 					goto done;
651 				}
652 
653 syn_recv:
654 				if (!(r->tcpdiag_states&TCPF_SYN_RECV))
655 					goto next_listen;
656 
657 				if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
658 					inet_listen_unlock(hashinfo);
659 					goto done;
660 				}
661 
662 next_listen:
663 				cb->args[3] = 0;
664 				cb->args[4] = 0;
665 				++num;
666 			}
667 
668 			s_num = 0;
669 			cb->args[3] = 0;
670 			cb->args[4] = 0;
671 		}
672 		inet_listen_unlock(hashinfo);
673 skip_listen_ht:
674 		cb->args[0] = 1;
675 		s_i = num = s_num = 0;
676 	}
677 
678 	if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
679 		return skb->len;
680 
681 	for (i = s_i; i < hashinfo->ehash_size; i++) {
682 		struct inet_ehash_bucket *head = &hashinfo->ehash[i];
683 		struct sock *sk;
684 		struct hlist_node *node;
685 
686 		if (i > s_i)
687 			s_num = 0;
688 
689 		read_lock_bh(&head->lock);
690 
691 		num = 0;
692 		sk_for_each(sk, node, &head->chain) {
693 			struct inet_sock *inet = inet_sk(sk);
694 
695 			if (num < s_num)
696 				goto next_normal;
697 			if (!(r->tcpdiag_states & (1 << sk->sk_state)))
698 				goto next_normal;
699 			if (r->id.tcpdiag_sport != inet->sport &&
700 			    r->id.tcpdiag_sport)
701 				goto next_normal;
702 			if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport)
703 				goto next_normal;
704 			if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
705 				read_unlock_bh(&head->lock);
706 				goto done;
707 			}
708 next_normal:
709 			++num;
710 		}
711 
712 		if (r->tcpdiag_states&TCPF_TIME_WAIT) {
713 			sk_for_each(sk, node,
714 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
715 				struct inet_sock *inet = inet_sk(sk);
716 
717 				if (num < s_num)
718 					goto next_dying;
719 				if (r->id.tcpdiag_sport != inet->sport &&
720 				    r->id.tcpdiag_sport)
721 					goto next_dying;
722 				if (r->id.tcpdiag_dport != inet->dport &&
723 				    r->id.tcpdiag_dport)
724 					goto next_dying;
725 				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
726 					read_unlock_bh(&head->lock);
727 					goto done;
728 				}
729 next_dying:
730 				++num;
731 			}
732 		}
733 		read_unlock_bh(&head->lock);
734 	}
735 
736 done:
737 	cb->args[1] = i;
738 	cb->args[2] = num;
739 	return skb->len;
740 }
741 
742 static int tcpdiag_dump_done(struct netlink_callback *cb)
743 {
744 	return 0;
745 }
746 
747 
748 static __inline__ int
749 tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
750 {
751 	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
752 		return 0;
753 
754 	if (nlh->nlmsg_type != TCPDIAG_GETSOCK
755 #ifdef CONFIG_IP_TCPDIAG_DCCP
756 	    && nlh->nlmsg_type != DCCPDIAG_GETSOCK
757 #endif
758 	   )
759 		goto err_inval;
760 
761 	if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
762 		goto err_inval;
763 
764 	if (nlh->nlmsg_flags&NLM_F_DUMP) {
765 		if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
766 			struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
767 			if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
768 			    rta->rta_len < 8 ||
769 			    rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
770 				goto err_inval;
771 			if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
772 				goto err_inval;
773 		}
774 		return netlink_dump_start(tcpnl, skb, nlh,
775 					  tcpdiag_dump,
776 					  tcpdiag_dump_done);
777 	} else {
778 		return tcpdiag_get_exact(skb, nlh);
779 	}
780 
781 err_inval:
782 	return -EINVAL;
783 }
784 
785 
786 static inline void tcpdiag_rcv_skb(struct sk_buff *skb)
787 {
788 	int err;
789 	struct nlmsghdr * nlh;
790 
791 	if (skb->len >= NLMSG_SPACE(0)) {
792 		nlh = (struct nlmsghdr *)skb->data;
793 		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
794 			return;
795 		err = tcpdiag_rcv_msg(skb, nlh);
796 		if (err || nlh->nlmsg_flags & NLM_F_ACK)
797 			netlink_ack(skb, nlh, err);
798 	}
799 }
800 
801 static void tcpdiag_rcv(struct sock *sk, int len)
802 {
803 	struct sk_buff *skb;
804 	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
805 
806 	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
807 		tcpdiag_rcv_skb(skb);
808 		kfree_skb(skb);
809 	}
810 }
811 
812 static int __init tcpdiag_init(void)
813 {
814 	tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv,
815 				      THIS_MODULE);
816 	if (tcpnl == NULL)
817 		return -ENOMEM;
818 	return 0;
819 }
820 
821 static void __exit tcpdiag_exit(void)
822 {
823 	sock_release(tcpnl->sk_socket);
824 }
825 
826 module_init(tcpdiag_init);
827 module_exit(tcpdiag_exit);
828 MODULE_LICENSE("GPL");
829