xref: /openbmc/linux/net/dccp/output.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *  net/dccp/output.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  */
12 
13 #include <linux/config.h>
14 #include <linux/dccp.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 
18 #include <net/sock.h>
19 
20 #include "ackvec.h"
21 #include "ccid.h"
22 #include "dccp.h"
23 
24 static inline void dccp_event_ack_sent(struct sock *sk)
25 {
26 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
27 }
28 
29 static inline void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
30 {
31 	skb_set_owner_w(skb, sk);
32 	WARN_ON(sk->sk_send_head);
33 	sk->sk_send_head = skb;
34 }
35 
36 /*
37  * All SKB's seen here are completely headerless. It is our
38  * job to build the DCCP header, and pass the packet down to
39  * IP so it can do the same plus pass the packet off to the
40  * device.
41  */
42 static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
43 {
44 	if (likely(skb != NULL)) {
45 		const struct inet_sock *inet = inet_sk(sk);
46 		struct dccp_sock *dp = dccp_sk(sk);
47 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
48 		struct dccp_hdr *dh;
49 		/* XXX For now we're using only 48 bits sequence numbers */
50 		const int dccp_header_size = sizeof(*dh) +
51 					     sizeof(struct dccp_hdr_ext) +
52 					  dccp_packet_hdr_len(dcb->dccpd_type);
53 		int err, set_ack = 1;
54 		u64 ackno = dp->dccps_gsr;
55 
56 		dccp_inc_seqno(&dp->dccps_gss);
57 
58 		switch (dcb->dccpd_type) {
59 		case DCCP_PKT_DATA:
60 			set_ack = 0;
61 			/* fall through */
62 		case DCCP_PKT_DATAACK:
63 			break;
64 
65 		case DCCP_PKT_SYNC:
66 		case DCCP_PKT_SYNCACK:
67 			ackno = dcb->dccpd_seq;
68 			/* fall through */
69 		default:
70 			/*
71 			 * Only data packets should come through with skb->sk
72 			 * set.
73 			 */
74 			WARN_ON(skb->sk);
75 			skb_set_owner_w(skb, sk);
76 			break;
77 		}
78 
79 		dcb->dccpd_seq = dp->dccps_gss;
80 		dccp_insert_options(sk, skb);
81 
82 		skb->h.raw = skb_push(skb, dccp_header_size);
83 		dh = dccp_hdr(skb);
84 
85 		/* Build DCCP header and checksum it. */
86 		memset(dh, 0, dccp_header_size);
87 		dh->dccph_type	= dcb->dccpd_type;
88 		dh->dccph_sport	= inet->sport;
89 		dh->dccph_dport	= inet->dport;
90 		dh->dccph_doff	= (dccp_header_size + dcb->dccpd_opt_len) / 4;
91 		dh->dccph_ccval	= dcb->dccpd_ccval;
92 		/* XXX For now we're using only 48 bits sequence numbers */
93 		dh->dccph_x	= 1;
94 
95 		dp->dccps_awh = dp->dccps_gss;
96 		dccp_hdr_set_seq(dh, dp->dccps_gss);
97 		if (set_ack)
98 			dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
99 
100 		switch (dcb->dccpd_type) {
101 		case DCCP_PKT_REQUEST:
102 			dccp_hdr_request(skb)->dccph_req_service =
103 							dp->dccps_service;
104 			break;
105 		case DCCP_PKT_RESET:
106 			dccp_hdr_reset(skb)->dccph_reset_code =
107 							dcb->dccpd_reset_code;
108 			break;
109 		}
110 
111 		dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
112 						      inet->daddr);
113 
114 		if (set_ack)
115 			dccp_event_ack_sent(sk);
116 
117 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
118 
119 		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
120 		err = ip_queue_xmit(skb, 0);
121 		if (err <= 0)
122 			return err;
123 
124 		/* NET_XMIT_CN is special. It does not guarantee,
125 		 * that this packet is lost. It tells that device
126 		 * is about to start to drop packets or already
127 		 * drops some packets of the same priority and
128 		 * invokes us to send less aggressively.
129 		 */
130 		return err == NET_XMIT_CN ? 0 : err;
131 	}
132 	return -ENOBUFS;
133 }
134 
135 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
136 {
137 	struct dccp_sock *dp = dccp_sk(sk);
138 	int mss_now;
139 
140 	/*
141 	 * FIXME: we really should be using the af_specific thing to support
142 	 * 	  IPv6.
143 	 * mss_now = pmtu - tp->af_specific->net_header_len -
144 	 * 	     sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
145 	 */
146 	mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
147 		  sizeof(struct dccp_hdr_ext);
148 
149 	/* Now subtract optional transport overhead */
150 	mss_now -= dp->dccps_ext_header_len;
151 
152 	/*
153 	 * FIXME: this should come from the CCID infrastructure, where, say,
154 	 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
155 	 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
156 	 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
157 	 * make it a multiple of 4
158 	 */
159 
160 	mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
161 
162 	/* And store cached results */
163 	dp->dccps_pmtu_cookie = pmtu;
164 	dp->dccps_mss_cache = mss_now;
165 
166 	return mss_now;
167 }
168 
169 void dccp_write_space(struct sock *sk)
170 {
171 	read_lock(&sk->sk_callback_lock);
172 
173 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
174 		wake_up_interruptible(sk->sk_sleep);
175 	/* Should agree with poll, otherwise some programs break */
176 	if (sock_writeable(sk))
177 		sk_wake_async(sk, 2, POLL_OUT);
178 
179 	read_unlock(&sk->sk_callback_lock);
180 }
181 
182 /**
183  * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
184  * @sk: socket to wait for
185  * @timeo: for how long
186  */
187 static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
188 			      long *timeo)
189 {
190 	struct dccp_sock *dp = dccp_sk(sk);
191 	DEFINE_WAIT(wait);
192 	long delay;
193 	int rc;
194 
195 	while (1) {
196 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
197 
198 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
199 			goto do_error;
200 		if (!*timeo)
201 			goto do_nonblock;
202 		if (signal_pending(current))
203 			goto do_interrupted;
204 
205 		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
206 					    skb->len);
207 		if (rc <= 0)
208 			break;
209 		delay = msecs_to_jiffies(rc);
210 		if (delay > *timeo || delay < 0)
211 			goto do_nonblock;
212 
213 		sk->sk_write_pending++;
214 		release_sock(sk);
215 		*timeo -= schedule_timeout(delay);
216 		lock_sock(sk);
217 		sk->sk_write_pending--;
218 	}
219 out:
220 	finish_wait(sk->sk_sleep, &wait);
221 	return rc;
222 
223 do_error:
224 	rc = -EPIPE;
225 	goto out;
226 do_nonblock:
227 	rc = -EAGAIN;
228 	goto out;
229 do_interrupted:
230 	rc = sock_intr_errno(*timeo);
231 	goto out;
232 }
233 
234 int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
235 {
236 	const struct dccp_sock *dp = dccp_sk(sk);
237 	int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
238 					 skb->len);
239 
240 	if (err > 0)
241 		err = dccp_wait_for_ccid(sk, skb, timeo);
242 
243 	if (err == 0) {
244 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
245 		const int len = skb->len;
246 
247 		if (sk->sk_state == DCCP_PARTOPEN) {
248 			/* See 8.1.5.  Handshake Completion */
249 			inet_csk_schedule_ack(sk);
250 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
251 						  inet_csk(sk)->icsk_rto,
252 						  DCCP_RTO_MAX);
253 			dcb->dccpd_type = DCCP_PKT_DATAACK;
254 		} else if (dccp_ack_pending(sk))
255 			dcb->dccpd_type = DCCP_PKT_DATAACK;
256 		else
257 			dcb->dccpd_type = DCCP_PKT_DATA;
258 
259 		err = dccp_transmit_skb(sk, skb);
260 		ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
261 	} else
262 		kfree_skb(skb);
263 
264 	return err;
265 }
266 
267 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
268 {
269 	if (inet_sk_rebuild_header(sk) != 0)
270 		return -EHOSTUNREACH; /* Routing failure or similar. */
271 
272 	return dccp_transmit_skb(sk, (skb_cloned(skb) ?
273 				      pskb_copy(skb, GFP_ATOMIC):
274 				      skb_clone(skb, GFP_ATOMIC)));
275 }
276 
277 struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
278 				   struct request_sock *req)
279 {
280 	struct dccp_hdr *dh;
281 	struct dccp_request_sock *dreq;
282 	const int dccp_header_size = sizeof(struct dccp_hdr) +
283 				     sizeof(struct dccp_hdr_ext) +
284 				     sizeof(struct dccp_hdr_response);
285 	struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
286 					       dccp_header_size, 1,
287 					   GFP_ATOMIC);
288 	if (skb == NULL)
289 		return NULL;
290 
291 	/* Reserve space for headers. */
292 	skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
293 
294 	skb->dst = dst_clone(dst);
295 	skb->csum = 0;
296 
297 	dreq = dccp_rsk(req);
298 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
299 	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
300 	dccp_insert_options(sk, skb);
301 
302 	skb->h.raw = skb_push(skb, dccp_header_size);
303 
304 	dh = dccp_hdr(skb);
305 	memset(dh, 0, dccp_header_size);
306 
307 	dh->dccph_sport	= inet_sk(sk)->sport;
308 	dh->dccph_dport	= inet_rsk(req)->rmt_port;
309 	dh->dccph_doff	= (dccp_header_size +
310 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
311 	dh->dccph_type	= DCCP_PKT_RESPONSE;
312 	dh->dccph_x	= 1;
313 	dccp_hdr_set_seq(dh, dreq->dreq_iss);
314 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
315 	dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
316 
317 	dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
318 					      inet_rsk(req)->rmt_addr);
319 
320 	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
321 	return skb;
322 }
323 
324 struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
325 				const enum dccp_reset_codes code)
326 
327 {
328 	struct dccp_hdr *dh;
329 	struct dccp_sock *dp = dccp_sk(sk);
330 	const int dccp_header_size = sizeof(struct dccp_hdr) +
331 				     sizeof(struct dccp_hdr_ext) +
332 				     sizeof(struct dccp_hdr_reset);
333 	struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
334 					       dccp_header_size, 1,
335 					   GFP_ATOMIC);
336 	if (skb == NULL)
337 		return NULL;
338 
339 	/* Reserve space for headers. */
340 	skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
341 
342 	skb->dst = dst_clone(dst);
343 	skb->csum = 0;
344 
345 	dccp_inc_seqno(&dp->dccps_gss);
346 
347 	DCCP_SKB_CB(skb)->dccpd_reset_code = code;
348 	DCCP_SKB_CB(skb)->dccpd_type	   = DCCP_PKT_RESET;
349 	DCCP_SKB_CB(skb)->dccpd_seq	   = dp->dccps_gss;
350 	dccp_insert_options(sk, skb);
351 
352 	skb->h.raw = skb_push(skb, dccp_header_size);
353 
354 	dh = dccp_hdr(skb);
355 	memset(dh, 0, dccp_header_size);
356 
357 	dh->dccph_sport	= inet_sk(sk)->sport;
358 	dh->dccph_dport	= inet_sk(sk)->dport;
359 	dh->dccph_doff	= (dccp_header_size +
360 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
361 	dh->dccph_type	= DCCP_PKT_RESET;
362 	dh->dccph_x	= 1;
363 	dccp_hdr_set_seq(dh, dp->dccps_gss);
364 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
365 
366 	dccp_hdr_reset(skb)->dccph_reset_code = code;
367 
368 	dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
369 					      inet_sk(sk)->daddr);
370 
371 	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
372 	return skb;
373 }
374 
375 /*
376  * Do all connect socket setups that can be done AF independent.
377  */
378 static inline void dccp_connect_init(struct sock *sk)
379 {
380 	struct dst_entry *dst = __sk_dst_get(sk);
381 	struct inet_connection_sock *icsk = inet_csk(sk);
382 
383 	sk->sk_err = 0;
384 	sock_reset_flag(sk, SOCK_DONE);
385 
386 	dccp_sync_mss(sk, dst_mtu(dst));
387 
388 	/*
389 	 * FIXME: set dp->{dccps_swh,dccps_swl}, with
390 	 * something like dccp_inc_seq
391 	 */
392 
393 	icsk->icsk_retransmits = 0;
394 }
395 
396 int dccp_connect(struct sock *sk)
397 {
398 	struct sk_buff *skb;
399 	struct inet_connection_sock *icsk = inet_csk(sk);
400 
401 	dccp_connect_init(sk);
402 
403 	skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
404 	if (unlikely(skb == NULL))
405 		return -ENOBUFS;
406 
407 	/* Reserve space for headers. */
408 	skb_reserve(skb, MAX_DCCP_HEADER);
409 
410 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
411 	skb->csum = 0;
412 
413 	dccp_skb_entail(sk, skb);
414 	dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
415 	DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
416 
417 	/* Timer for repeating the REQUEST until an answer. */
418 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
419 				  icsk->icsk_rto, DCCP_RTO_MAX);
420 	return 0;
421 }
422 
423 void dccp_send_ack(struct sock *sk)
424 {
425 	/* If we have been reset, we may not send again. */
426 	if (sk->sk_state != DCCP_CLOSED) {
427 		struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
428 
429 		if (skb == NULL) {
430 			inet_csk_schedule_ack(sk);
431 			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
432 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
433 						  TCP_DELACK_MAX,
434 						  DCCP_RTO_MAX);
435 			return;
436 		}
437 
438 		/* Reserve space for headers */
439 		skb_reserve(skb, MAX_DCCP_HEADER);
440 		skb->csum = 0;
441 		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
442 		dccp_transmit_skb(sk, skb);
443 	}
444 }
445 
446 EXPORT_SYMBOL_GPL(dccp_send_ack);
447 
448 void dccp_send_delayed_ack(struct sock *sk)
449 {
450 	struct inet_connection_sock *icsk = inet_csk(sk);
451 	/*
452 	 * FIXME: tune this timer. elapsed time fixes the skew, so no problem
453 	 * with using 2s, and active senders also piggyback the ACK into a
454 	 * DATAACK packet, so this is really for quiescent senders.
455 	 */
456 	unsigned long timeout = jiffies + 2 * HZ;
457 
458 	/* Use new timeout only if there wasn't a older one earlier. */
459 	if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
460 		/* If delack timer was blocked or is about to expire,
461 		 * send ACK now.
462 		 *
463 		 * FIXME: check the "about to expire" part
464 		 */
465 		if (icsk->icsk_ack.blocked) {
466 			dccp_send_ack(sk);
467 			return;
468 		}
469 
470 		if (!time_before(timeout, icsk->icsk_ack.timeout))
471 			timeout = icsk->icsk_ack.timeout;
472 	}
473 	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
474 	icsk->icsk_ack.timeout = timeout;
475 	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
476 }
477 
478 void dccp_send_sync(struct sock *sk, const u64 seq,
479 		    const enum dccp_pkt_type pkt_type)
480 {
481 	/*
482 	 * We are not putting this on the write queue, so
483 	 * dccp_transmit_skb() will set the ownership to this
484 	 * sock.
485 	 */
486 	struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
487 
488 	if (skb == NULL)
489 		/* FIXME: how to make sure the sync is sent? */
490 		return;
491 
492 	/* Reserve space for headers and prepare control bits. */
493 	skb_reserve(skb, MAX_DCCP_HEADER);
494 	skb->csum = 0;
495 	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
496 	DCCP_SKB_CB(skb)->dccpd_seq = seq;
497 
498 	dccp_transmit_skb(sk, skb);
499 }
500 
501 /*
502  * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
503  * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
504  * any circumstances.
505  */
506 void dccp_send_close(struct sock *sk, const int active)
507 {
508 	struct dccp_sock *dp = dccp_sk(sk);
509 	struct sk_buff *skb;
510 	const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC;
511 
512 	skb = alloc_skb(sk->sk_prot->max_header, prio);
513 	if (skb == NULL)
514 		return;
515 
516 	/* Reserve space for headers and prepare control bits. */
517 	skb_reserve(skb, sk->sk_prot->max_header);
518 	skb->csum = 0;
519 	DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
520 					DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
521 
522 	if (active) {
523 		dccp_skb_entail(sk, skb);
524 		dccp_transmit_skb(sk, skb_clone(skb, prio));
525 	} else
526 		dccp_transmit_skb(sk, skb);
527 }
528