1 /* RxRPC recvmsg() implementation 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/net.h> 15 #include <linux/skbuff.h> 16 #include <linux/export.h> 17 #include <net/sock.h> 18 #include <net/af_rxrpc.h> 19 #include "ar-internal.h" 20 21 /* 22 * Post a call for attention by the socket or kernel service. Further 23 * notifications are suppressed by putting recvmsg_link on a dummy queue. 24 */ 25 void rxrpc_notify_socket(struct rxrpc_call *call) 26 { 27 struct rxrpc_sock *rx; 28 struct sock *sk; 29 30 _enter("%d", call->debug_id); 31 32 if (!list_empty(&call->recvmsg_link)) 33 return; 34 35 rcu_read_lock(); 36 37 rx = rcu_dereference(call->socket); 38 sk = &rx->sk; 39 if (rx && sk->sk_state < RXRPC_CLOSE) { 40 if (call->notify_rx) { 41 call->notify_rx(sk, call, call->user_call_ID); 42 } else { 43 write_lock_bh(&rx->recvmsg_lock); 44 if (list_empty(&call->recvmsg_link)) { 45 rxrpc_get_call(call, rxrpc_call_got); 46 list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); 47 } 48 write_unlock_bh(&rx->recvmsg_lock); 49 50 if (!sock_flag(sk, SOCK_DEAD)) { 51 _debug("call %ps", sk->sk_data_ready); 52 sk->sk_data_ready(sk); 53 } 54 } 55 } 56 57 rcu_read_unlock(); 58 _leave(""); 59 } 60 61 /* 62 * Pass a call terminating message to userspace. 63 */ 64 static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) 65 { 66 u32 tmp = 0; 67 int ret; 68 69 switch (call->completion) { 70 case RXRPC_CALL_SUCCEEDED: 71 ret = 0; 72 if (rxrpc_is_service_call(call)) 73 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); 74 break; 75 case RXRPC_CALL_REMOTELY_ABORTED: 76 tmp = call->abort_code; 77 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); 78 break; 79 case RXRPC_CALL_LOCALLY_ABORTED: 80 tmp = call->abort_code; 81 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); 82 break; 83 case RXRPC_CALL_NETWORK_ERROR: 84 tmp = call->error; 85 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); 86 break; 87 case RXRPC_CALL_LOCAL_ERROR: 88 tmp = call->error; 89 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); 90 break; 91 default: 92 pr_err("Invalid terminal call state %u\n", call->state); 93 BUG(); 94 break; 95 } 96 97 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, 98 call->rx_pkt_offset, call->rx_pkt_len, ret); 99 return ret; 100 } 101 102 /* 103 * Pass back notification of a new call. The call is added to the 104 * to-be-accepted list. This means that the next call to be accepted might not 105 * be the last call seen awaiting acceptance, but unless we leave this on the 106 * front of the queue and block all other messages until someone gives us a 107 * user_ID for it, there's not a lot we can do. 108 */ 109 static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, 110 struct rxrpc_call *call, 111 struct msghdr *msg, int flags) 112 { 113 int tmp = 0, ret; 114 115 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); 116 117 if (ret == 0 && !(flags & MSG_PEEK)) { 118 _debug("to be accepted"); 119 write_lock_bh(&rx->recvmsg_lock); 120 list_del_init(&call->recvmsg_link); 121 write_unlock_bh(&rx->recvmsg_lock); 122 123 rxrpc_get_call(call, rxrpc_call_got); 124 write_lock(&rx->call_lock); 125 list_add_tail(&call->accept_link, &rx->to_be_accepted); 126 write_unlock(&rx->call_lock); 127 } 128 129 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); 130 return ret; 131 } 132 133 /* 134 * End the packet reception phase. 135 */ 136 static void rxrpc_end_rx_phase(struct rxrpc_call *call) 137 { 138 _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); 139 140 trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); 141 ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); 142 143 if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { 144 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); 145 rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); 146 } else { 147 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); 148 } 149 150 write_lock_bh(&call->state_lock); 151 152 switch (call->state) { 153 case RXRPC_CALL_CLIENT_RECV_REPLY: 154 __rxrpc_call_completed(call); 155 break; 156 157 case RXRPC_CALL_SERVER_RECV_REQUEST: 158 call->state = RXRPC_CALL_SERVER_ACK_REQUEST; 159 break; 160 default: 161 break; 162 } 163 164 write_unlock_bh(&call->state_lock); 165 } 166 167 /* 168 * Discard a packet we've used up and advance the Rx window by one. 169 */ 170 static void rxrpc_rotate_rx_window(struct rxrpc_call *call) 171 { 172 struct rxrpc_skb_priv *sp; 173 struct sk_buff *skb; 174 rxrpc_serial_t serial; 175 rxrpc_seq_t hard_ack, top; 176 u8 flags; 177 int ix; 178 179 _enter("%d", call->debug_id); 180 181 hard_ack = call->rx_hard_ack; 182 top = smp_load_acquire(&call->rx_top); 183 ASSERT(before(hard_ack, top)); 184 185 hard_ack++; 186 ix = hard_ack & RXRPC_RXTX_BUFF_MASK; 187 skb = call->rxtx_buffer[ix]; 188 rxrpc_see_skb(skb); 189 sp = rxrpc_skb(skb); 190 flags = sp->hdr.flags; 191 serial = sp->hdr.serial; 192 if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) 193 serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; 194 195 call->rxtx_buffer[ix] = NULL; 196 call->rxtx_annotations[ix] = 0; 197 /* Barrier against rxrpc_input_data(). */ 198 smp_store_release(&call->rx_hard_ack, hard_ack); 199 200 rxrpc_free_skb(skb); 201 202 _debug("%u,%u,%02x", hard_ack, top, flags); 203 trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); 204 if (flags & RXRPC_LAST_PACKET) 205 rxrpc_end_rx_phase(call); 206 } 207 208 /* 209 * Decrypt and verify a (sub)packet. The packet's length may be changed due to 210 * padding, but if this is the case, the packet length will be resident in the 211 * socket buffer. Note that we can't modify the master skb info as the skb may 212 * be the home to multiple subpackets. 213 */ 214 static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, 215 u8 annotation, 216 unsigned int offset, unsigned int len) 217 { 218 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 219 rxrpc_seq_t seq = sp->hdr.seq; 220 u16 cksum = sp->hdr.cksum; 221 222 _enter(""); 223 224 /* For all but the head jumbo subpacket, the security checksum is in a 225 * jumbo header immediately prior to the data. 226 */ 227 if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { 228 __be16 tmp; 229 if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) 230 BUG(); 231 cksum = ntohs(tmp); 232 seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; 233 } 234 235 return call->conn->security->verify_packet(call, skb, offset, len, 236 seq, cksum); 237 } 238 239 /* 240 * Locate the data within a packet. This is complicated by: 241 * 242 * (1) An skb may contain a jumbo packet - so we have to find the appropriate 243 * subpacket. 244 * 245 * (2) The (sub)packets may be encrypted and, if so, the encrypted portion 246 * contains an extra header which includes the true length of the data, 247 * excluding any encrypted padding. 248 */ 249 static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, 250 u8 *_annotation, 251 unsigned int *_offset, unsigned int *_len) 252 { 253 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 254 unsigned int offset = *_offset; 255 unsigned int len = *_len; 256 int ret; 257 u8 annotation = *_annotation; 258 259 /* Locate the subpacket */ 260 offset = sp->offset; 261 len = skb->len - sp->offset; 262 if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { 263 offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * 264 RXRPC_JUMBO_SUBPKTLEN); 265 len = (annotation & RXRPC_RX_ANNO_JLAST) ? 266 skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; 267 } 268 269 if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { 270 ret = rxrpc_verify_packet(call, skb, annotation, offset, len); 271 if (ret < 0) 272 return ret; 273 *_annotation |= RXRPC_RX_ANNO_VERIFIED; 274 } 275 276 *_offset = offset; 277 *_len = len; 278 call->conn->security->locate_data(call, skb, _offset, _len); 279 return 0; 280 } 281 282 /* 283 * Deliver messages to a call. This keeps processing packets until the buffer 284 * is filled and we find either more DATA (returns 0) or the end of the DATA 285 * (returns 1). If more packets are required, it returns -EAGAIN. 286 */ 287 static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, 288 struct msghdr *msg, struct iov_iter *iter, 289 size_t len, int flags, size_t *_offset) 290 { 291 struct rxrpc_skb_priv *sp; 292 struct sk_buff *skb; 293 rxrpc_seq_t hard_ack, top, seq; 294 size_t remain; 295 bool last; 296 unsigned int rx_pkt_offset, rx_pkt_len; 297 int ix, copy, ret = -EAGAIN, ret2; 298 299 _enter(""); 300 301 rx_pkt_offset = call->rx_pkt_offset; 302 rx_pkt_len = call->rx_pkt_len; 303 304 if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { 305 seq = call->rx_hard_ack; 306 ret = 1; 307 goto done; 308 } 309 310 /* Barriers against rxrpc_input_data(). */ 311 hard_ack = call->rx_hard_ack; 312 top = smp_load_acquire(&call->rx_top); 313 for (seq = hard_ack + 1; before_eq(seq, top); seq++) { 314 ix = seq & RXRPC_RXTX_BUFF_MASK; 315 skb = call->rxtx_buffer[ix]; 316 if (!skb) { 317 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, 318 rx_pkt_offset, rx_pkt_len, 0); 319 break; 320 } 321 smp_rmb(); 322 rxrpc_see_skb(skb); 323 sp = rxrpc_skb(skb); 324 325 if (!(flags & MSG_PEEK)) 326 trace_rxrpc_receive(call, rxrpc_receive_front, 327 sp->hdr.serial, seq); 328 329 if (msg) 330 sock_recv_timestamp(msg, sock->sk, skb); 331 332 if (rx_pkt_offset == 0) { 333 ret2 = rxrpc_locate_data(call, skb, 334 &call->rxtx_annotations[ix], 335 &rx_pkt_offset, &rx_pkt_len); 336 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, 337 rx_pkt_offset, rx_pkt_len, ret2); 338 if (ret2 < 0) { 339 ret = ret2; 340 goto out; 341 } 342 } else { 343 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, 344 rx_pkt_offset, rx_pkt_len, 0); 345 } 346 _debug("recvmsg %x DATA #%u { %d, %d }", 347 sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); 348 349 /* We have to handle short, empty and used-up DATA packets. */ 350 remain = len - *_offset; 351 copy = rx_pkt_len; 352 if (copy > remain) 353 copy = remain; 354 if (copy > 0) { 355 ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, 356 copy); 357 if (ret2 < 0) { 358 ret = ret2; 359 goto out; 360 } 361 362 /* handle piecemeal consumption of data packets */ 363 _debug("copied %d @%zu", copy, *_offset); 364 365 rx_pkt_offset += copy; 366 rx_pkt_len -= copy; 367 *_offset += copy; 368 } 369 370 if (rx_pkt_len > 0) { 371 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, 372 rx_pkt_offset, rx_pkt_len, 0); 373 _debug("buffer full"); 374 ASSERTCMP(*_offset, ==, len); 375 ret = 0; 376 break; 377 } 378 379 /* The whole packet has been transferred. */ 380 last = sp->hdr.flags & RXRPC_LAST_PACKET; 381 if (!(flags & MSG_PEEK)) 382 rxrpc_rotate_rx_window(call); 383 rx_pkt_offset = 0; 384 rx_pkt_len = 0; 385 386 if (last) { 387 ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); 388 ret = 1; 389 goto out; 390 } 391 } 392 393 out: 394 if (!(flags & MSG_PEEK)) { 395 call->rx_pkt_offset = rx_pkt_offset; 396 call->rx_pkt_len = rx_pkt_len; 397 } 398 done: 399 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, 400 rx_pkt_offset, rx_pkt_len, ret); 401 _leave(" = %d [%u/%u]", ret, seq, top); 402 return ret; 403 } 404 405 /* 406 * Receive a message from an RxRPC socket 407 * - we need to be careful about two or more threads calling recvmsg 408 * simultaneously 409 */ 410 int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 411 int flags) 412 { 413 struct rxrpc_call *call; 414 struct rxrpc_sock *rx = rxrpc_sk(sock->sk); 415 struct list_head *l; 416 size_t copied = 0; 417 long timeo; 418 int ret; 419 420 DEFINE_WAIT(wait); 421 422 trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); 423 424 if (flags & (MSG_OOB | MSG_TRUNC)) 425 return -EOPNOTSUPP; 426 427 timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); 428 429 try_again: 430 lock_sock(&rx->sk); 431 432 /* Return immediately if a client socket has no outstanding calls */ 433 if (RB_EMPTY_ROOT(&rx->calls) && 434 list_empty(&rx->recvmsg_q) && 435 rx->sk.sk_state != RXRPC_SERVER_LISTENING) { 436 release_sock(&rx->sk); 437 return -ENODATA; 438 } 439 440 if (list_empty(&rx->recvmsg_q)) { 441 ret = -EWOULDBLOCK; 442 if (timeo == 0) { 443 call = NULL; 444 goto error_no_call; 445 } 446 447 release_sock(&rx->sk); 448 449 /* Wait for something to happen */ 450 prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, 451 TASK_INTERRUPTIBLE); 452 ret = sock_error(&rx->sk); 453 if (ret) 454 goto wait_error; 455 456 if (list_empty(&rx->recvmsg_q)) { 457 if (signal_pending(current)) 458 goto wait_interrupted; 459 trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 460 0, 0, 0, 0); 461 timeo = schedule_timeout(timeo); 462 } 463 finish_wait(sk_sleep(&rx->sk), &wait); 464 goto try_again; 465 } 466 467 /* Find the next call and dequeue it if we're not just peeking. If we 468 * do dequeue it, that comes with a ref that we will need to release. 469 */ 470 write_lock_bh(&rx->recvmsg_lock); 471 l = rx->recvmsg_q.next; 472 call = list_entry(l, struct rxrpc_call, recvmsg_link); 473 if (!(flags & MSG_PEEK)) 474 list_del_init(&call->recvmsg_link); 475 else 476 rxrpc_get_call(call, rxrpc_call_got); 477 write_unlock_bh(&rx->recvmsg_lock); 478 479 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); 480 481 if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) 482 BUG(); 483 484 if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { 485 if (flags & MSG_CMSG_COMPAT) { 486 unsigned int id32 = call->user_call_ID; 487 488 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, 489 sizeof(unsigned int), &id32); 490 } else { 491 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, 492 sizeof(unsigned long), 493 &call->user_call_ID); 494 } 495 if (ret < 0) 496 goto error; 497 } 498 499 if (msg->msg_name) { 500 size_t len = sizeof(call->conn->params.peer->srx); 501 memcpy(msg->msg_name, &call->conn->params.peer->srx, len); 502 msg->msg_namelen = len; 503 } 504 505 switch (call->state) { 506 case RXRPC_CALL_SERVER_ACCEPTING: 507 ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); 508 break; 509 case RXRPC_CALL_CLIENT_RECV_REPLY: 510 case RXRPC_CALL_SERVER_RECV_REQUEST: 511 case RXRPC_CALL_SERVER_ACK_REQUEST: 512 ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, 513 flags, &copied); 514 if (ret == -EAGAIN) 515 ret = 0; 516 517 if (after(call->rx_top, call->rx_hard_ack) && 518 call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) 519 rxrpc_notify_socket(call); 520 break; 521 default: 522 ret = 0; 523 break; 524 } 525 526 if (ret < 0) 527 goto error; 528 529 if (call->state == RXRPC_CALL_COMPLETE) { 530 ret = rxrpc_recvmsg_term(call, msg); 531 if (ret < 0) 532 goto error; 533 if (!(flags & MSG_PEEK)) 534 rxrpc_release_call(rx, call); 535 msg->msg_flags |= MSG_EOR; 536 ret = 1; 537 } 538 539 if (ret == 0) 540 msg->msg_flags |= MSG_MORE; 541 else 542 msg->msg_flags &= ~MSG_MORE; 543 ret = copied; 544 545 error: 546 rxrpc_put_call(call, rxrpc_call_put); 547 error_no_call: 548 release_sock(&rx->sk); 549 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); 550 return ret; 551 552 wait_interrupted: 553 ret = sock_intr_errno(timeo); 554 wait_error: 555 finish_wait(sk_sleep(&rx->sk), &wait); 556 call = NULL; 557 goto error_no_call; 558 } 559 560 /** 561 * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info 562 * @sock: The socket that the call exists on 563 * @call: The call to send data through 564 * @buf: The buffer to receive into 565 * @size: The size of the buffer, including data already read 566 * @_offset: The running offset into the buffer. 567 * @want_more: True if more data is expected to be read 568 * @_abort: Where the abort code is stored if -ECONNABORTED is returned 569 * 570 * Allow a kernel service to receive data and pick up information about the 571 * state of a call. Returns 0 if got what was asked for and there's more 572 * available, 1 if we got what was asked for and we're at the end of the data 573 * and -EAGAIN if we need more data. 574 * 575 * Note that we may return -EAGAIN to drain empty packets at the end of the 576 * data, even if we've already copied over the requested data. 577 * 578 * This function adds the amount it transfers to *_offset, so this should be 579 * precleared as appropriate. Note that the amount remaining in the buffer is 580 * taken to be size - *_offset. 581 * 582 * *_abort should also be initialised to 0. 583 */ 584 int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, 585 void *buf, size_t size, size_t *_offset, 586 bool want_more, u32 *_abort) 587 { 588 struct iov_iter iter; 589 struct kvec iov; 590 int ret; 591 592 _enter("{%d,%s},%zu/%zu,%d", 593 call->debug_id, rxrpc_call_states[call->state], 594 *_offset, size, want_more); 595 596 ASSERTCMP(*_offset, <=, size); 597 ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); 598 599 iov.iov_base = buf + *_offset; 600 iov.iov_len = size - *_offset; 601 iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); 602 603 lock_sock(sock->sk); 604 605 switch (call->state) { 606 case RXRPC_CALL_CLIENT_RECV_REPLY: 607 case RXRPC_CALL_SERVER_RECV_REQUEST: 608 case RXRPC_CALL_SERVER_ACK_REQUEST: 609 ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0, 610 _offset); 611 if (ret < 0) 612 goto out; 613 614 /* We can only reach here with a partially full buffer if we 615 * have reached the end of the data. We must otherwise have a 616 * full buffer or have been given -EAGAIN. 617 */ 618 if (ret == 1) { 619 if (*_offset < size) 620 goto short_data; 621 if (!want_more) 622 goto read_phase_complete; 623 ret = 0; 624 goto out; 625 } 626 627 if (!want_more) 628 goto excess_data; 629 goto out; 630 631 case RXRPC_CALL_COMPLETE: 632 goto call_complete; 633 634 default: 635 ret = -EINPROGRESS; 636 goto out; 637 } 638 639 read_phase_complete: 640 ret = 1; 641 out: 642 release_sock(sock->sk); 643 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); 644 return ret; 645 646 short_data: 647 ret = -EBADMSG; 648 goto out; 649 excess_data: 650 ret = -EMSGSIZE; 651 goto out; 652 call_complete: 653 *_abort = call->abort_code; 654 ret = call->error; 655 if (call->completion == RXRPC_CALL_SUCCEEDED) { 656 ret = 1; 657 if (size > 0) 658 ret = -ECONNRESET; 659 } 660 goto out; 661 } 662 EXPORT_SYMBOL(rxrpc_kernel_recv_data); 663