1 /* 2 * net/dccp/output.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/dccp.h> 14 #include <linux/kernel.h> 15 #include <linux/skbuff.h> 16 17 #include <net/inet_sock.h> 18 #include <net/sock.h> 19 20 #include "ackvec.h" 21 #include "ccid.h" 22 #include "dccp.h" 23 24 static inline void dccp_event_ack_sent(struct sock *sk) 25 { 26 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 27 } 28 29 static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) 30 { 31 skb_set_owner_w(skb, sk); 32 WARN_ON(sk->sk_send_head); 33 sk->sk_send_head = skb; 34 } 35 36 /* 37 * All SKB's seen here are completely headerless. It is our 38 * job to build the DCCP header, and pass the packet down to 39 * IP so it can do the same plus pass the packet off to the 40 * device. 41 */ 42 static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) 43 { 44 if (likely(skb != NULL)) { 45 const struct inet_sock *inet = inet_sk(sk); 46 const struct inet_connection_sock *icsk = inet_csk(sk); 47 struct dccp_sock *dp = dccp_sk(sk); 48 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 49 struct dccp_hdr *dh; 50 /* XXX For now we're using only 48 bits sequence numbers */ 51 const u32 dccp_header_size = sizeof(*dh) + 52 sizeof(struct dccp_hdr_ext) + 53 dccp_packet_hdr_len(dcb->dccpd_type); 54 int err, set_ack = 1; 55 u64 ackno = dp->dccps_gsr; 56 57 dccp_inc_seqno(&dp->dccps_gss); 58 59 switch (dcb->dccpd_type) { 60 case DCCP_PKT_DATA: 61 set_ack = 0; 62 /* fall through */ 63 case DCCP_PKT_DATAACK: 64 case DCCP_PKT_RESET: 65 break; 66 67 case DCCP_PKT_REQUEST: 68 set_ack = 0; 69 /* fall through */ 70 71 case DCCP_PKT_SYNC: 72 case DCCP_PKT_SYNCACK: 73 ackno = dcb->dccpd_ack_seq; 74 /* fall through */ 75 default: 76 /* 77 * Set owner/destructor: some skbs are allocated via 78 * alloc_skb (e.g. when retransmission may happen). 79 * Only Data, DataAck, and Reset packets should come 80 * through here with skb->sk set. 81 */ 82 WARN_ON(skb->sk); 83 skb_set_owner_w(skb, sk); 84 break; 85 } 86 87 dcb->dccpd_seq = dp->dccps_gss; 88 89 if (dccp_insert_options(sk, skb)) { 90 kfree_skb(skb); 91 return -EPROTO; 92 } 93 94 95 /* Build DCCP header and checksum it. */ 96 dh = dccp_zeroed_hdr(skb, dccp_header_size); 97 dh->dccph_type = dcb->dccpd_type; 98 dh->dccph_sport = inet->sport; 99 dh->dccph_dport = inet->dport; 100 dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; 101 dh->dccph_ccval = dcb->dccpd_ccval; 102 dh->dccph_cscov = dp->dccps_pcslen; 103 /* XXX For now we're using only 48 bits sequence numbers */ 104 dh->dccph_x = 1; 105 106 dp->dccps_awh = dp->dccps_gss; 107 dccp_hdr_set_seq(dh, dp->dccps_gss); 108 if (set_ack) 109 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); 110 111 switch (dcb->dccpd_type) { 112 case DCCP_PKT_REQUEST: 113 dccp_hdr_request(skb)->dccph_req_service = 114 dp->dccps_service; 115 break; 116 case DCCP_PKT_RESET: 117 dccp_hdr_reset(skb)->dccph_reset_code = 118 dcb->dccpd_reset_code; 119 break; 120 } 121 122 icsk->icsk_af_ops->send_check(sk, 0, skb); 123 124 if (set_ack) 125 dccp_event_ack_sent(sk); 126 127 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 128 129 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 130 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 131 return net_xmit_eval(err); 132 } 133 return -ENOBUFS; 134 } 135 136 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) 137 { 138 struct inet_connection_sock *icsk = inet_csk(sk); 139 struct dccp_sock *dp = dccp_sk(sk); 140 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - 141 sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); 142 143 /* Now subtract optional transport overhead */ 144 mss_now -= icsk->icsk_ext_hdr_len; 145 146 /* 147 * FIXME: this should come from the CCID infrastructure, where, say, 148 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets 149 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED 150 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to 151 * make it a multiple of 4 152 */ 153 154 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 155 156 /* And store cached results */ 157 icsk->icsk_pmtu_cookie = pmtu; 158 dp->dccps_mss_cache = mss_now; 159 160 return mss_now; 161 } 162 163 EXPORT_SYMBOL_GPL(dccp_sync_mss); 164 165 void dccp_write_space(struct sock *sk) 166 { 167 read_lock(&sk->sk_callback_lock); 168 169 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 170 wake_up_interruptible(sk->sk_sleep); 171 /* Should agree with poll, otherwise some programs break */ 172 if (sock_writeable(sk)) 173 sk_wake_async(sk, 2, POLL_OUT); 174 175 read_unlock(&sk->sk_callback_lock); 176 } 177 178 /** 179 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 180 * @sk: socket to wait for 181 * @skb: current skb to pass on for waiting 182 * @delay: sleep timeout in milliseconds (> 0) 183 * This function is called by default when the socket is closed, and 184 * when a non-zero linger time is set on the socket. For consistency 185 */ 186 static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 187 { 188 struct dccp_sock *dp = dccp_sk(sk); 189 DEFINE_WAIT(wait); 190 unsigned long jiffdelay; 191 int rc; 192 193 do { 194 dccp_pr_debug("delayed send by %d msec\n", delay); 195 jiffdelay = msecs_to_jiffies(delay); 196 197 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 198 199 sk->sk_write_pending++; 200 release_sock(sk); 201 schedule_timeout(jiffdelay); 202 lock_sock(sk); 203 sk->sk_write_pending--; 204 205 if (sk->sk_err) 206 goto do_error; 207 if (signal_pending(current)) 208 goto do_interrupted; 209 210 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 211 } while ((delay = rc) > 0); 212 out: 213 finish_wait(sk->sk_sleep, &wait); 214 return rc; 215 216 do_error: 217 rc = -EPIPE; 218 goto out; 219 do_interrupted: 220 rc = -EINTR; 221 goto out; 222 } 223 224 void dccp_write_xmit(struct sock *sk, int block) 225 { 226 struct dccp_sock *dp = dccp_sk(sk); 227 struct sk_buff *skb; 228 229 while ((skb = skb_peek(&sk->sk_write_queue))) { 230 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 231 232 if (err > 0) { 233 if (!block) { 234 sk_reset_timer(sk, &dp->dccps_xmit_timer, 235 msecs_to_jiffies(err)+jiffies); 236 break; 237 } else 238 err = dccp_wait_for_ccid(sk, skb, err); 239 if (err && err != -EINTR) 240 DCCP_BUG("err=%d after dccp_wait_for_ccid", err); 241 } 242 243 skb_dequeue(&sk->sk_write_queue); 244 if (err == 0) { 245 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 246 const int len = skb->len; 247 248 if (sk->sk_state == DCCP_PARTOPEN) { 249 /* See 8.1.5. Handshake Completion */ 250 inet_csk_schedule_ack(sk); 251 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 252 inet_csk(sk)->icsk_rto, 253 DCCP_RTO_MAX); 254 dcb->dccpd_type = DCCP_PKT_DATAACK; 255 } else if (dccp_ack_pending(sk)) 256 dcb->dccpd_type = DCCP_PKT_DATAACK; 257 else 258 dcb->dccpd_type = DCCP_PKT_DATA; 259 260 err = dccp_transmit_skb(sk, skb); 261 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); 262 if (err) 263 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", 264 err); 265 } else { 266 dccp_pr_debug("packet discarded due to err=%d\n", err); 267 kfree_skb(skb); 268 } 269 } 270 } 271 272 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 273 { 274 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) 275 return -EHOSTUNREACH; /* Routing failure or similar. */ 276 277 return dccp_transmit_skb(sk, (skb_cloned(skb) ? 278 pskb_copy(skb, GFP_ATOMIC): 279 skb_clone(skb, GFP_ATOMIC))); 280 } 281 282 struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, 283 struct request_sock *req) 284 { 285 struct dccp_hdr *dh; 286 struct dccp_request_sock *dreq; 287 const u32 dccp_header_size = sizeof(struct dccp_hdr) + 288 sizeof(struct dccp_hdr_ext) + 289 sizeof(struct dccp_hdr_response); 290 struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, 291 GFP_ATOMIC); 292 if (skb == NULL) 293 return NULL; 294 295 /* Reserve space for headers. */ 296 skb_reserve(skb, sk->sk_prot->max_header); 297 298 skb->dst = dst_clone(dst); 299 300 dreq = dccp_rsk(req); 301 if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ 302 dccp_inc_seqno(&dreq->dreq_iss); 303 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 304 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; 305 306 if (dccp_insert_options(sk, skb)) { 307 kfree_skb(skb); 308 return NULL; 309 } 310 311 /* Build and checksum header */ 312 dh = dccp_zeroed_hdr(skb, dccp_header_size); 313 314 dh->dccph_sport = inet_sk(sk)->sport; 315 dh->dccph_dport = inet_rsk(req)->rmt_port; 316 dh->dccph_doff = (dccp_header_size + 317 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 318 dh->dccph_type = DCCP_PKT_RESPONSE; 319 dh->dccph_x = 1; 320 dccp_hdr_set_seq(dh, dreq->dreq_iss); 321 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); 322 dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; 323 324 dccp_csum_outgoing(skb); 325 326 /* We use `acked' to remember that a Response was already sent. */ 327 inet_rsk(req)->acked = 1; 328 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 329 return skb; 330 } 331 332 EXPORT_SYMBOL_GPL(dccp_make_response); 333 334 /* answer offending packet in @rcv_skb with Reset from control socket @ctl */ 335 struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, struct sk_buff *rcv_skb) 336 { 337 struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh; 338 struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb); 339 const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + 340 sizeof(struct dccp_hdr_ext) + 341 sizeof(struct dccp_hdr_reset); 342 struct dccp_hdr_reset *dhr; 343 struct sk_buff *skb; 344 345 skb = alloc_skb(ctl->sk->sk_prot->max_header, GFP_ATOMIC); 346 if (skb == NULL) 347 return NULL; 348 349 skb_reserve(skb, ctl->sk->sk_prot->max_header); 350 351 /* Swap the send and the receive. */ 352 dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); 353 dh->dccph_type = DCCP_PKT_RESET; 354 dh->dccph_sport = rxdh->dccph_dport; 355 dh->dccph_dport = rxdh->dccph_sport; 356 dh->dccph_doff = dccp_hdr_reset_len / 4; 357 dh->dccph_x = 1; 358 359 dhr = dccp_hdr_reset(skb); 360 dhr->dccph_reset_code = dcb->dccpd_reset_code; 361 362 switch (dcb->dccpd_reset_code) { 363 case DCCP_RESET_CODE_PACKET_ERROR: 364 dhr->dccph_reset_data[0] = rxdh->dccph_type; 365 break; 366 case DCCP_RESET_CODE_OPTION_ERROR: /* fall through */ 367 case DCCP_RESET_CODE_MANDATORY_ERROR: 368 memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3); 369 break; 370 } 371 /* 372 * From RFC 4340, 8.3.1: 373 * If P.ackno exists, set R.seqno := P.ackno + 1. 374 * Else set R.seqno := 0. 375 */ 376 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 377 dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1)); 378 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq); 379 380 dccp_csum_outgoing(skb); 381 return skb; 382 } 383 384 EXPORT_SYMBOL_GPL(dccp_ctl_make_reset); 385 386 /* send Reset on established socket, to close or abort the connection */ 387 int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) 388 { 389 struct sk_buff *skb; 390 /* 391 * FIXME: what if rebuild_header fails? 392 * Should we be doing a rebuild_header here? 393 */ 394 int err = inet_sk_rebuild_header(sk); 395 396 if (err != 0) 397 return err; 398 399 skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC); 400 if (skb == NULL) 401 return -ENOBUFS; 402 403 /* Reserve space for headers and prepare control bits. */ 404 skb_reserve(skb, sk->sk_prot->max_header); 405 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; 406 DCCP_SKB_CB(skb)->dccpd_reset_code = code; 407 408 return dccp_transmit_skb(sk, skb); 409 } 410 411 /* 412 * Do all connect socket setups that can be done AF independent. 413 */ 414 static inline void dccp_connect_init(struct sock *sk) 415 { 416 struct dccp_sock *dp = dccp_sk(sk); 417 struct dst_entry *dst = __sk_dst_get(sk); 418 struct inet_connection_sock *icsk = inet_csk(sk); 419 420 sk->sk_err = 0; 421 sock_reset_flag(sk, SOCK_DONE); 422 423 dccp_sync_mss(sk, dst_mtu(dst)); 424 425 /* 426 * SWL and AWL are initially adjusted so that they are not less than 427 * the initial Sequence Numbers received and sent, respectively: 428 * SWL := max(GSR + 1 - floor(W/4), ISR), 429 * AWL := max(GSS - W' + 1, ISS). 430 * These adjustments MUST be applied only at the beginning of the 431 * connection. 432 */ 433 dccp_update_gss(sk, dp->dccps_iss); 434 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); 435 436 /* S.GAR - greatest valid acknowledgement number received on a non-Sync; 437 * initialized to S.ISS (sec. 8.5) */ 438 dp->dccps_gar = dp->dccps_iss; 439 440 icsk->icsk_retransmits = 0; 441 } 442 443 int dccp_connect(struct sock *sk) 444 { 445 struct sk_buff *skb; 446 struct inet_connection_sock *icsk = inet_csk(sk); 447 448 dccp_connect_init(sk); 449 450 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); 451 if (unlikely(skb == NULL)) 452 return -ENOBUFS; 453 454 /* Reserve space for headers. */ 455 skb_reserve(skb, sk->sk_prot->max_header); 456 457 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 458 459 dccp_skb_entail(sk, skb); 460 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); 461 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 462 463 /* Timer for repeating the REQUEST until an answer. */ 464 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 465 icsk->icsk_rto, DCCP_RTO_MAX); 466 return 0; 467 } 468 469 EXPORT_SYMBOL_GPL(dccp_connect); 470 471 void dccp_send_ack(struct sock *sk) 472 { 473 /* If we have been reset, we may not send again. */ 474 if (sk->sk_state != DCCP_CLOSED) { 475 struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, 476 GFP_ATOMIC); 477 478 if (skb == NULL) { 479 inet_csk_schedule_ack(sk); 480 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 481 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 482 TCP_DELACK_MAX, 483 DCCP_RTO_MAX); 484 return; 485 } 486 487 /* Reserve space for headers */ 488 skb_reserve(skb, sk->sk_prot->max_header); 489 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; 490 dccp_transmit_skb(sk, skb); 491 } 492 } 493 494 EXPORT_SYMBOL_GPL(dccp_send_ack); 495 496 /* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ 497 void dccp_send_delayed_ack(struct sock *sk) 498 { 499 struct inet_connection_sock *icsk = inet_csk(sk); 500 /* 501 * FIXME: tune this timer. elapsed time fixes the skew, so no problem 502 * with using 2s, and active senders also piggyback the ACK into a 503 * DATAACK packet, so this is really for quiescent senders. 504 */ 505 unsigned long timeout = jiffies + 2 * HZ; 506 507 /* Use new timeout only if there wasn't a older one earlier. */ 508 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { 509 /* If delack timer was blocked or is about to expire, 510 * send ACK now. 511 * 512 * FIXME: check the "about to expire" part 513 */ 514 if (icsk->icsk_ack.blocked) { 515 dccp_send_ack(sk); 516 return; 517 } 518 519 if (!time_before(timeout, icsk->icsk_ack.timeout)) 520 timeout = icsk->icsk_ack.timeout; 521 } 522 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; 523 icsk->icsk_ack.timeout = timeout; 524 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); 525 } 526 527 void dccp_send_sync(struct sock *sk, const u64 ackno, 528 const enum dccp_pkt_type pkt_type) 529 { 530 /* 531 * We are not putting this on the write queue, so 532 * dccp_transmit_skb() will set the ownership to this 533 * sock. 534 */ 535 struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); 536 537 if (skb == NULL) { 538 /* FIXME: how to make sure the sync is sent? */ 539 DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type)); 540 return; 541 } 542 543 /* Reserve space for headers and prepare control bits. */ 544 skb_reserve(skb, sk->sk_prot->max_header); 545 DCCP_SKB_CB(skb)->dccpd_type = pkt_type; 546 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; 547 548 dccp_transmit_skb(sk, skb); 549 } 550 551 EXPORT_SYMBOL_GPL(dccp_send_sync); 552 553 /* 554 * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This 555 * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under 556 * any circumstances. 557 */ 558 void dccp_send_close(struct sock *sk, const int active) 559 { 560 struct dccp_sock *dp = dccp_sk(sk); 561 struct sk_buff *skb; 562 const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC; 563 564 skb = alloc_skb(sk->sk_prot->max_header, prio); 565 if (skb == NULL) 566 return; 567 568 /* Reserve space for headers and prepare control bits. */ 569 skb_reserve(skb, sk->sk_prot->max_header); 570 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? 571 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; 572 573 if (active) { 574 dccp_write_xmit(sk, 1); 575 dccp_skb_entail(sk, skb); 576 dccp_transmit_skb(sk, skb_clone(skb, prio)); 577 /* FIXME do we need a retransmit timer here? */ 578 } else 579 dccp_transmit_skb(sk, skb); 580 } 581