1 /* 2 * net/tipc/socket.c: TIPC socket API 3 * 4 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB 5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/rhashtable.h> 38 #include <linux/sched/signal.h> 39 40 #include "core.h" 41 #include "name_table.h" 42 #include "node.h" 43 #include "link.h" 44 #include "name_distr.h" 45 #include "socket.h" 46 #include "bcast.h" 47 #include "netlink.h" 48 #include "group.h" 49 #include "trace.h" 50 51 #define NAGLE_START_INIT 4 52 #define NAGLE_START_MAX 1024 53 #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ 54 #define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ 55 #define TIPC_FWD_MSG 1 56 #define TIPC_MAX_PORT 0xffffffff 57 #define TIPC_MIN_PORT 1 58 #define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */ 59 60 enum { 61 TIPC_LISTEN = TCP_LISTEN, 62 TIPC_ESTABLISHED = TCP_ESTABLISHED, 63 TIPC_OPEN = TCP_CLOSE, 64 TIPC_DISCONNECTING = TCP_CLOSE_WAIT, 65 TIPC_CONNECTING = TCP_SYN_SENT, 66 }; 67 68 struct sockaddr_pair { 69 struct sockaddr_tipc sock; 70 struct sockaddr_tipc member; 71 }; 72 73 /** 74 * struct tipc_sock - TIPC socket structure 75 * @sk: socket - interacts with 'port' and with user via the socket API 76 * @conn_type: TIPC type used when connection was established 77 * @conn_instance: TIPC instance used when connection was established 78 * @published: non-zero if port has one or more associated names 79 * @max_pkt: maximum packet size "hint" used when building messages sent by port 80 * @maxnagle: maximum size of msg which can be subject to nagle 81 * @portid: unique port identity in TIPC socket hash table 82 * @phdr: preformatted message header used when sending messages 83 * #cong_links: list of congested links 84 * @publications: list of publications for port 85 * @blocking_link: address of the congested link we are currently sleeping on 86 * @pub_count: total # of publications port has made during its lifetime 87 * @conn_timeout: the time we can wait for an unresponded setup request 88 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue 89 * @cong_link_cnt: number of congested links 90 * @snt_unacked: # messages sent by socket, and not yet acked by peer 91 * @rcv_unacked: # messages read by user, but not yet acked back to peer 92 * @peer: 'connected' peer for dgram/rdm 93 * @node: hash table node 94 * @mc_method: cookie for use between socket and broadcast layer 95 * @rcu: rcu struct for tipc_sock 96 */ 97 struct tipc_sock { 98 struct sock sk; 99 u32 conn_type; 100 u32 conn_instance; 101 int published; 102 u32 max_pkt; 103 u32 maxnagle; 104 u32 portid; 105 struct tipc_msg phdr; 106 struct list_head cong_links; 107 struct list_head publications; 108 u32 pub_count; 109 atomic_t dupl_rcvcnt; 110 u16 conn_timeout; 111 bool probe_unacked; 112 u16 cong_link_cnt; 113 u16 snt_unacked; 114 u16 snd_win; 115 u16 peer_caps; 116 u16 rcv_unacked; 117 u16 rcv_win; 118 struct sockaddr_tipc peer; 119 struct rhash_head node; 120 struct tipc_mc_method mc_method; 121 struct rcu_head rcu; 122 struct tipc_group *group; 123 u32 oneway; 124 u32 nagle_start; 125 u16 snd_backlog; 126 u16 msg_acc; 127 u16 pkt_cnt; 128 bool expect_ack; 129 bool nodelay; 130 bool group_is_open; 131 }; 132 133 static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); 134 static void tipc_data_ready(struct sock *sk); 135 static void tipc_write_space(struct sock *sk); 136 static void tipc_sock_destruct(struct sock *sk); 137 static int tipc_release(struct socket *sock); 138 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, 139 bool kern); 140 static void tipc_sk_timeout(struct timer_list *t); 141 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 142 struct tipc_name_seq const *seq); 143 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, 144 struct tipc_name_seq const *seq); 145 static int tipc_sk_leave(struct tipc_sock *tsk); 146 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); 147 static int tipc_sk_insert(struct tipc_sock *tsk); 148 static void tipc_sk_remove(struct tipc_sock *tsk); 149 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); 150 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); 151 static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); 152 153 static const struct proto_ops packet_ops; 154 static const struct proto_ops stream_ops; 155 static const struct proto_ops msg_ops; 156 static struct proto tipc_proto; 157 static const struct rhashtable_params tsk_rht_params; 158 159 static u32 tsk_own_node(struct tipc_sock *tsk) 160 { 161 return msg_prevnode(&tsk->phdr); 162 } 163 164 static u32 tsk_peer_node(struct tipc_sock *tsk) 165 { 166 return msg_destnode(&tsk->phdr); 167 } 168 169 static u32 tsk_peer_port(struct tipc_sock *tsk) 170 { 171 return msg_destport(&tsk->phdr); 172 } 173 174 static bool tsk_unreliable(struct tipc_sock *tsk) 175 { 176 return msg_src_droppable(&tsk->phdr) != 0; 177 } 178 179 static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) 180 { 181 msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); 182 } 183 184 static bool tsk_unreturnable(struct tipc_sock *tsk) 185 { 186 return msg_dest_droppable(&tsk->phdr) != 0; 187 } 188 189 static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) 190 { 191 msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); 192 } 193 194 static int tsk_importance(struct tipc_sock *tsk) 195 { 196 return msg_importance(&tsk->phdr); 197 } 198 199 static struct tipc_sock *tipc_sk(const struct sock *sk) 200 { 201 return container_of(sk, struct tipc_sock, sk); 202 } 203 204 int tsk_set_importance(struct sock *sk, int imp) 205 { 206 if (imp > TIPC_CRITICAL_IMPORTANCE) 207 return -EINVAL; 208 msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp); 209 return 0; 210 } 211 212 static bool tsk_conn_cong(struct tipc_sock *tsk) 213 { 214 return tsk->snt_unacked > tsk->snd_win; 215 } 216 217 static u16 tsk_blocks(int len) 218 { 219 return ((len / FLOWCTL_BLK_SZ) + 1); 220 } 221 222 /* tsk_blocks(): translate a buffer size in bytes to number of 223 * advertisable blocks, taking into account the ratio truesize(len)/len 224 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ 225 */ 226 static u16 tsk_adv_blocks(int len) 227 { 228 return len / FLOWCTL_BLK_SZ / 4; 229 } 230 231 /* tsk_inc(): increment counter for sent or received data 232 * - If block based flow control is not supported by peer we 233 * fall back to message based ditto, incrementing the counter 234 */ 235 static u16 tsk_inc(struct tipc_sock *tsk, int msglen) 236 { 237 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) 238 return ((msglen / FLOWCTL_BLK_SZ) + 1); 239 return 1; 240 } 241 242 /* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle 243 */ 244 static void tsk_set_nagle(struct tipc_sock *tsk) 245 { 246 struct sock *sk = &tsk->sk; 247 248 tsk->maxnagle = 0; 249 if (sk->sk_type != SOCK_STREAM) 250 return; 251 if (tsk->nodelay) 252 return; 253 if (!(tsk->peer_caps & TIPC_NAGLE)) 254 return; 255 /* Limit node local buffer size to avoid receive queue overflow */ 256 if (tsk->max_pkt == MAX_MSG_SIZE) 257 tsk->maxnagle = 1500; 258 else 259 tsk->maxnagle = tsk->max_pkt; 260 } 261 262 /** 263 * tsk_advance_rx_queue - discard first buffer in socket receive queue 264 * 265 * Caller must hold socket lock 266 */ 267 static void tsk_advance_rx_queue(struct sock *sk) 268 { 269 trace_tipc_sk_advance_rx(sk, NULL, TIPC_DUMP_SK_RCVQ, " "); 270 kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); 271 } 272 273 /* tipc_sk_respond() : send response message back to sender 274 */ 275 static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) 276 { 277 u32 selector; 278 u32 dnode; 279 u32 onode = tipc_own_addr(sock_net(sk)); 280 281 if (!tipc_msg_reverse(onode, &skb, err)) 282 return; 283 284 trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@sk_respond!"); 285 dnode = msg_destnode(buf_msg(skb)); 286 selector = msg_origport(buf_msg(skb)); 287 tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); 288 } 289 290 /** 291 * tsk_rej_rx_queue - reject all buffers in socket receive queue 292 * 293 * Caller must hold socket lock 294 */ 295 static void tsk_rej_rx_queue(struct sock *sk, int error) 296 { 297 struct sk_buff *skb; 298 299 while ((skb = __skb_dequeue(&sk->sk_receive_queue))) 300 tipc_sk_respond(sk, skb, error); 301 } 302 303 static bool tipc_sk_connected(struct sock *sk) 304 { 305 return sk->sk_state == TIPC_ESTABLISHED; 306 } 307 308 /* tipc_sk_type_connectionless - check if the socket is datagram socket 309 * @sk: socket 310 * 311 * Returns true if connection less, false otherwise 312 */ 313 static bool tipc_sk_type_connectionless(struct sock *sk) 314 { 315 return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; 316 } 317 318 /* tsk_peer_msg - verify if message was sent by connected port's peer 319 * 320 * Handles cases where the node's network address has changed from 321 * the default of <0.0.0> to its configured setting. 322 */ 323 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) 324 { 325 struct sock *sk = &tsk->sk; 326 u32 self = tipc_own_addr(sock_net(sk)); 327 u32 peer_port = tsk_peer_port(tsk); 328 u32 orig_node, peer_node; 329 330 if (unlikely(!tipc_sk_connected(sk))) 331 return false; 332 333 if (unlikely(msg_origport(msg) != peer_port)) 334 return false; 335 336 orig_node = msg_orignode(msg); 337 peer_node = tsk_peer_node(tsk); 338 339 if (likely(orig_node == peer_node)) 340 return true; 341 342 if (!orig_node && peer_node == self) 343 return true; 344 345 if (!peer_node && orig_node == self) 346 return true; 347 348 return false; 349 } 350 351 /* tipc_set_sk_state - set the sk_state of the socket 352 * @sk: socket 353 * 354 * Caller must hold socket lock 355 * 356 * Returns 0 on success, errno otherwise 357 */ 358 static int tipc_set_sk_state(struct sock *sk, int state) 359 { 360 int oldsk_state = sk->sk_state; 361 int res = -EINVAL; 362 363 switch (state) { 364 case TIPC_OPEN: 365 res = 0; 366 break; 367 case TIPC_LISTEN: 368 case TIPC_CONNECTING: 369 if (oldsk_state == TIPC_OPEN) 370 res = 0; 371 break; 372 case TIPC_ESTABLISHED: 373 if (oldsk_state == TIPC_CONNECTING || 374 oldsk_state == TIPC_OPEN) 375 res = 0; 376 break; 377 case TIPC_DISCONNECTING: 378 if (oldsk_state == TIPC_CONNECTING || 379 oldsk_state == TIPC_ESTABLISHED) 380 res = 0; 381 break; 382 } 383 384 if (!res) 385 sk->sk_state = state; 386 387 return res; 388 } 389 390 static int tipc_sk_sock_err(struct socket *sock, long *timeout) 391 { 392 struct sock *sk = sock->sk; 393 int err = sock_error(sk); 394 int typ = sock->type; 395 396 if (err) 397 return err; 398 if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) { 399 if (sk->sk_state == TIPC_DISCONNECTING) 400 return -EPIPE; 401 else if (!tipc_sk_connected(sk)) 402 return -ENOTCONN; 403 } 404 if (!*timeout) 405 return -EAGAIN; 406 if (signal_pending(current)) 407 return sock_intr_errno(*timeout); 408 409 return 0; 410 } 411 412 #define tipc_wait_for_cond(sock_, timeo_, condition_) \ 413 ({ \ 414 DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ 415 struct sock *sk_; \ 416 int rc_; \ 417 \ 418 while ((rc_ = !(condition_))) { \ 419 /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ 420 smp_rmb(); \ 421 sk_ = (sock_)->sk; \ 422 rc_ = tipc_sk_sock_err((sock_), timeo_); \ 423 if (rc_) \ 424 break; \ 425 add_wait_queue(sk_sleep(sk_), &wait_); \ 426 release_sock(sk_); \ 427 *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ 428 sched_annotate_sleep(); \ 429 lock_sock(sk_); \ 430 remove_wait_queue(sk_sleep(sk_), &wait_); \ 431 } \ 432 rc_; \ 433 }) 434 435 /** 436 * tipc_sk_create - create a TIPC socket 437 * @net: network namespace (must be default network) 438 * @sock: pre-allocated socket structure 439 * @protocol: protocol indicator (must be 0) 440 * @kern: caused by kernel or by userspace? 441 * 442 * This routine creates additional data structures used by the TIPC socket, 443 * initializes them, and links them together. 444 * 445 * Returns 0 on success, errno otherwise 446 */ 447 static int tipc_sk_create(struct net *net, struct socket *sock, 448 int protocol, int kern) 449 { 450 const struct proto_ops *ops; 451 struct sock *sk; 452 struct tipc_sock *tsk; 453 struct tipc_msg *msg; 454 455 /* Validate arguments */ 456 if (unlikely(protocol != 0)) 457 return -EPROTONOSUPPORT; 458 459 switch (sock->type) { 460 case SOCK_STREAM: 461 ops = &stream_ops; 462 break; 463 case SOCK_SEQPACKET: 464 ops = &packet_ops; 465 break; 466 case SOCK_DGRAM: 467 case SOCK_RDM: 468 ops = &msg_ops; 469 break; 470 default: 471 return -EPROTOTYPE; 472 } 473 474 /* Allocate socket's protocol area */ 475 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); 476 if (sk == NULL) 477 return -ENOMEM; 478 479 tsk = tipc_sk(sk); 480 tsk->max_pkt = MAX_PKT_DEFAULT; 481 tsk->maxnagle = 0; 482 tsk->nagle_start = NAGLE_START_INIT; 483 INIT_LIST_HEAD(&tsk->publications); 484 INIT_LIST_HEAD(&tsk->cong_links); 485 msg = &tsk->phdr; 486 487 /* Finish initializing socket data structures */ 488 sock->ops = ops; 489 sock_init_data(sock, sk); 490 tipc_set_sk_state(sk, TIPC_OPEN); 491 if (tipc_sk_insert(tsk)) { 492 pr_warn("Socket create failed; port number exhausted\n"); 493 return -EINVAL; 494 } 495 496 /* Ensure tsk is visible before we read own_addr. */ 497 smp_mb(); 498 499 tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE, 500 TIPC_NAMED_MSG, NAMED_H_SIZE, 0); 501 502 msg_set_origport(msg, tsk->portid); 503 timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); 504 sk->sk_shutdown = 0; 505 sk->sk_backlog_rcv = tipc_sk_backlog_rcv; 506 sk->sk_rcvbuf = sysctl_tipc_rmem[1]; 507 sk->sk_data_ready = tipc_data_ready; 508 sk->sk_write_space = tipc_write_space; 509 sk->sk_destruct = tipc_sock_destruct; 510 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; 511 tsk->group_is_open = true; 512 atomic_set(&tsk->dupl_rcvcnt, 0); 513 514 /* Start out with safe limits until we receive an advertised window */ 515 tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); 516 tsk->rcv_win = tsk->snd_win; 517 518 if (tipc_sk_type_connectionless(sk)) { 519 tsk_set_unreturnable(tsk, true); 520 if (sock->type == SOCK_DGRAM) 521 tsk_set_unreliable(tsk, true); 522 } 523 __skb_queue_head_init(&tsk->mc_method.deferredq); 524 trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); 525 return 0; 526 } 527 528 static void tipc_sk_callback(struct rcu_head *head) 529 { 530 struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); 531 532 sock_put(&tsk->sk); 533 } 534 535 /* Caller should hold socket lock for the socket. */ 536 static void __tipc_shutdown(struct socket *sock, int error) 537 { 538 struct sock *sk = sock->sk; 539 struct tipc_sock *tsk = tipc_sk(sk); 540 struct net *net = sock_net(sk); 541 long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); 542 u32 dnode = tsk_peer_node(tsk); 543 struct sk_buff *skb; 544 545 /* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */ 546 tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && 547 !tsk_conn_cong(tsk))); 548 549 /* Push out delayed messages if in Nagle mode */ 550 tipc_sk_push_backlog(tsk, false); 551 /* Remove pending SYN */ 552 __skb_queue_purge(&sk->sk_write_queue); 553 554 /* Remove partially received buffer if any */ 555 skb = skb_peek(&sk->sk_receive_queue); 556 if (skb && TIPC_SKB_CB(skb)->bytes_read) { 557 __skb_unlink(skb, &sk->sk_receive_queue); 558 kfree_skb(skb); 559 } 560 561 /* Reject all unreceived messages if connectionless */ 562 if (tipc_sk_type_connectionless(sk)) { 563 tsk_rej_rx_queue(sk, error); 564 return; 565 } 566 567 switch (sk->sk_state) { 568 case TIPC_CONNECTING: 569 case TIPC_ESTABLISHED: 570 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 571 tipc_node_remove_conn(net, dnode, tsk->portid); 572 /* Send a FIN+/- to its peer */ 573 skb = __skb_dequeue(&sk->sk_receive_queue); 574 if (skb) { 575 __skb_queue_purge(&sk->sk_receive_queue); 576 tipc_sk_respond(sk, skb, error); 577 break; 578 } 579 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, 580 TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, 581 tsk_own_node(tsk), tsk_peer_port(tsk), 582 tsk->portid, error); 583 if (skb) 584 tipc_node_xmit_skb(net, skb, dnode, tsk->portid); 585 break; 586 case TIPC_LISTEN: 587 /* Reject all SYN messages */ 588 tsk_rej_rx_queue(sk, error); 589 break; 590 default: 591 __skb_queue_purge(&sk->sk_receive_queue); 592 break; 593 } 594 } 595 596 /** 597 * tipc_release - destroy a TIPC socket 598 * @sock: socket to destroy 599 * 600 * This routine cleans up any messages that are still queued on the socket. 601 * For DGRAM and RDM socket types, all queued messages are rejected. 602 * For SEQPACKET and STREAM socket types, the first message is rejected 603 * and any others are discarded. (If the first message on a STREAM socket 604 * is partially-read, it is discarded and the next one is rejected instead.) 605 * 606 * NOTE: Rejected messages are not necessarily returned to the sender! They 607 * are returned or discarded according to the "destination droppable" setting 608 * specified for the message by the sender. 609 * 610 * Returns 0 on success, errno otherwise 611 */ 612 static int tipc_release(struct socket *sock) 613 { 614 struct sock *sk = sock->sk; 615 struct tipc_sock *tsk; 616 617 /* 618 * Exit if socket isn't fully initialized (occurs when a failed accept() 619 * releases a pre-allocated child socket that was never used) 620 */ 621 if (sk == NULL) 622 return 0; 623 624 tsk = tipc_sk(sk); 625 lock_sock(sk); 626 627 trace_tipc_sk_release(sk, NULL, TIPC_DUMP_ALL, " "); 628 __tipc_shutdown(sock, TIPC_ERR_NO_PORT); 629 sk->sk_shutdown = SHUTDOWN_MASK; 630 tipc_sk_leave(tsk); 631 tipc_sk_withdraw(tsk, 0, NULL); 632 __skb_queue_purge(&tsk->mc_method.deferredq); 633 sk_stop_timer(sk, &sk->sk_timer); 634 tipc_sk_remove(tsk); 635 636 sock_orphan(sk); 637 /* Reject any messages that accumulated in backlog queue */ 638 release_sock(sk); 639 tipc_dest_list_purge(&tsk->cong_links); 640 tsk->cong_link_cnt = 0; 641 call_rcu(&tsk->rcu, tipc_sk_callback); 642 sock->sk = NULL; 643 644 return 0; 645 } 646 647 /** 648 * tipc_bind - associate or disassocate TIPC name(s) with a socket 649 * @sock: socket structure 650 * @uaddr: socket address describing name(s) and desired operation 651 * @uaddr_len: size of socket address data structure 652 * 653 * Name and name sequence binding is indicated using a positive scope value; 654 * a negative scope value unbinds the specified name. Specifying no name 655 * (i.e. a socket address length of 0) unbinds all names from the socket. 656 * 657 * Returns 0 on success, errno otherwise 658 * 659 * NOTE: This routine doesn't need to take the socket lock since it doesn't 660 * access any non-constant socket information. 661 */ 662 static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, 663 int uaddr_len) 664 { 665 struct sock *sk = sock->sk; 666 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 667 struct tipc_sock *tsk = tipc_sk(sk); 668 int res = -EINVAL; 669 670 lock_sock(sk); 671 if (unlikely(!uaddr_len)) { 672 res = tipc_sk_withdraw(tsk, 0, NULL); 673 goto exit; 674 } 675 if (tsk->group) { 676 res = -EACCES; 677 goto exit; 678 } 679 if (uaddr_len < sizeof(struct sockaddr_tipc)) { 680 res = -EINVAL; 681 goto exit; 682 } 683 if (addr->family != AF_TIPC) { 684 res = -EAFNOSUPPORT; 685 goto exit; 686 } 687 688 if (addr->addrtype == TIPC_ADDR_NAME) 689 addr->addr.nameseq.upper = addr->addr.nameseq.lower; 690 else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { 691 res = -EAFNOSUPPORT; 692 goto exit; 693 } 694 695 if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && 696 (addr->addr.nameseq.type != TIPC_TOP_SRV) && 697 (addr->addr.nameseq.type != TIPC_CFG_SRV)) { 698 res = -EACCES; 699 goto exit; 700 } 701 702 res = (addr->scope >= 0) ? 703 tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : 704 tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); 705 exit: 706 release_sock(sk); 707 return res; 708 } 709 710 /** 711 * tipc_getname - get port ID of socket or peer socket 712 * @sock: socket structure 713 * @uaddr: area for returned socket address 714 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID 715 * 716 * Returns 0 on success, errno otherwise 717 * 718 * NOTE: This routine doesn't need to take the socket lock since it only 719 * accesses socket information that is unchanging (or which changes in 720 * a completely predictable manner). 721 */ 722 static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, 723 int peer) 724 { 725 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 726 struct sock *sk = sock->sk; 727 struct tipc_sock *tsk = tipc_sk(sk); 728 729 memset(addr, 0, sizeof(*addr)); 730 if (peer) { 731 if ((!tipc_sk_connected(sk)) && 732 ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) 733 return -ENOTCONN; 734 addr->addr.id.ref = tsk_peer_port(tsk); 735 addr->addr.id.node = tsk_peer_node(tsk); 736 } else { 737 addr->addr.id.ref = tsk->portid; 738 addr->addr.id.node = tipc_own_addr(sock_net(sk)); 739 } 740 741 addr->addrtype = TIPC_ADDR_ID; 742 addr->family = AF_TIPC; 743 addr->scope = 0; 744 addr->addr.name.domain = 0; 745 746 return sizeof(*addr); 747 } 748 749 /** 750 * tipc_poll - read and possibly block on pollmask 751 * @file: file structure associated with the socket 752 * @sock: socket for which to calculate the poll bits 753 * @wait: ??? 754 * 755 * Returns pollmask value 756 * 757 * COMMENTARY: 758 * It appears that the usual socket locking mechanisms are not useful here 759 * since the pollmask info is potentially out-of-date the moment this routine 760 * exits. TCP and other protocols seem to rely on higher level poll routines 761 * to handle any preventable race conditions, so TIPC will do the same ... 762 * 763 * IMPORTANT: The fact that a read or write operation is indicated does NOT 764 * imply that the operation will succeed, merely that it should be performed 765 * and will not block. 766 */ 767 static __poll_t tipc_poll(struct file *file, struct socket *sock, 768 poll_table *wait) 769 { 770 struct sock *sk = sock->sk; 771 struct tipc_sock *tsk = tipc_sk(sk); 772 __poll_t revents = 0; 773 774 sock_poll_wait(file, sock, wait); 775 trace_tipc_sk_poll(sk, NULL, TIPC_DUMP_ALL, " "); 776 777 if (sk->sk_shutdown & RCV_SHUTDOWN) 778 revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 779 if (sk->sk_shutdown == SHUTDOWN_MASK) 780 revents |= EPOLLHUP; 781 782 switch (sk->sk_state) { 783 case TIPC_ESTABLISHED: 784 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 785 revents |= EPOLLOUT; 786 /* fall through */ 787 case TIPC_LISTEN: 788 case TIPC_CONNECTING: 789 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 790 revents |= EPOLLIN | EPOLLRDNORM; 791 break; 792 case TIPC_OPEN: 793 if (tsk->group_is_open && !tsk->cong_link_cnt) 794 revents |= EPOLLOUT; 795 if (!tipc_sk_type_connectionless(sk)) 796 break; 797 if (skb_queue_empty_lockless(&sk->sk_receive_queue)) 798 break; 799 revents |= EPOLLIN | EPOLLRDNORM; 800 break; 801 case TIPC_DISCONNECTING: 802 revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP; 803 break; 804 } 805 return revents; 806 } 807 808 /** 809 * tipc_sendmcast - send multicast message 810 * @sock: socket structure 811 * @seq: destination address 812 * @msg: message to send 813 * @dlen: length of data to send 814 * @timeout: timeout to wait for wakeup 815 * 816 * Called from function tipc_sendmsg(), which has done all sanity checks 817 * Returns the number of bytes sent on success, or errno 818 */ 819 static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, 820 struct msghdr *msg, size_t dlen, long timeout) 821 { 822 struct sock *sk = sock->sk; 823 struct tipc_sock *tsk = tipc_sk(sk); 824 struct tipc_msg *hdr = &tsk->phdr; 825 struct net *net = sock_net(sk); 826 int mtu = tipc_bcast_get_mtu(net); 827 struct tipc_mc_method *method = &tsk->mc_method; 828 struct sk_buff_head pkts; 829 struct tipc_nlist dsts; 830 int rc; 831 832 if (tsk->group) 833 return -EACCES; 834 835 /* Block or return if any destination link is congested */ 836 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); 837 if (unlikely(rc)) 838 return rc; 839 840 /* Lookup destination nodes */ 841 tipc_nlist_init(&dsts, tipc_own_addr(net)); 842 tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, 843 seq->upper, &dsts); 844 if (!dsts.local && !dsts.remote) 845 return -EHOSTUNREACH; 846 847 /* Build message header */ 848 msg_set_type(hdr, TIPC_MCAST_MSG); 849 msg_set_hdr_sz(hdr, MCAST_H_SIZE); 850 msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); 851 msg_set_destport(hdr, 0); 852 msg_set_destnode(hdr, 0); 853 msg_set_nametype(hdr, seq->type); 854 msg_set_namelower(hdr, seq->lower); 855 msg_set_nameupper(hdr, seq->upper); 856 857 /* Build message as chain of buffers */ 858 __skb_queue_head_init(&pkts); 859 rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); 860 861 /* Send message if build was successful */ 862 if (unlikely(rc == dlen)) { 863 trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), 864 TIPC_DUMP_SK_SNDQ, " "); 865 rc = tipc_mcast_xmit(net, &pkts, method, &dsts, 866 &tsk->cong_link_cnt); 867 } 868 869 tipc_nlist_purge(&dsts); 870 871 return rc ? rc : dlen; 872 } 873 874 /** 875 * tipc_send_group_msg - send a message to a member in the group 876 * @net: network namespace 877 * @m: message to send 878 * @mb: group member 879 * @dnode: destination node 880 * @dport: destination port 881 * @dlen: total length of message data 882 */ 883 static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, 884 struct msghdr *m, struct tipc_member *mb, 885 u32 dnode, u32 dport, int dlen) 886 { 887 u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); 888 struct tipc_mc_method *method = &tsk->mc_method; 889 int blks = tsk_blocks(GROUP_H_SIZE + dlen); 890 struct tipc_msg *hdr = &tsk->phdr; 891 struct sk_buff_head pkts; 892 int mtu, rc; 893 894 /* Complete message header */ 895 msg_set_type(hdr, TIPC_GRP_UCAST_MSG); 896 msg_set_hdr_sz(hdr, GROUP_H_SIZE); 897 msg_set_destport(hdr, dport); 898 msg_set_destnode(hdr, dnode); 899 msg_set_grp_bc_seqno(hdr, bc_snd_nxt); 900 901 /* Build message as chain of buffers */ 902 __skb_queue_head_init(&pkts); 903 mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false); 904 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); 905 if (unlikely(rc != dlen)) 906 return rc; 907 908 /* Send message */ 909 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); 910 if (unlikely(rc == -ELINKCONG)) { 911 tipc_dest_push(&tsk->cong_links, dnode, 0); 912 tsk->cong_link_cnt++; 913 } 914 915 /* Update send window */ 916 tipc_group_update_member(mb, blks); 917 918 /* A broadcast sent within next EXPIRE period must follow same path */ 919 method->rcast = true; 920 method->mandatory = true; 921 return dlen; 922 } 923 924 /** 925 * tipc_send_group_unicast - send message to a member in the group 926 * @sock: socket structure 927 * @m: message to send 928 * @dlen: total length of message data 929 * @timeout: timeout to wait for wakeup 930 * 931 * Called from function tipc_sendmsg(), which has done all sanity checks 932 * Returns the number of bytes sent on success, or errno 933 */ 934 static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, 935 int dlen, long timeout) 936 { 937 struct sock *sk = sock->sk; 938 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 939 int blks = tsk_blocks(GROUP_H_SIZE + dlen); 940 struct tipc_sock *tsk = tipc_sk(sk); 941 struct net *net = sock_net(sk); 942 struct tipc_member *mb = NULL; 943 u32 node, port; 944 int rc; 945 946 node = dest->addr.id.node; 947 port = dest->addr.id.ref; 948 if (!port && !node) 949 return -EHOSTUNREACH; 950 951 /* Block or return if destination link or member is congested */ 952 rc = tipc_wait_for_cond(sock, &timeout, 953 !tipc_dest_find(&tsk->cong_links, node, 0) && 954 tsk->group && 955 !tipc_group_cong(tsk->group, node, port, blks, 956 &mb)); 957 if (unlikely(rc)) 958 return rc; 959 960 if (unlikely(!mb)) 961 return -EHOSTUNREACH; 962 963 rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); 964 965 return rc ? rc : dlen; 966 } 967 968 /** 969 * tipc_send_group_anycast - send message to any member with given identity 970 * @sock: socket structure 971 * @m: message to send 972 * @dlen: total length of message data 973 * @timeout: timeout to wait for wakeup 974 * 975 * Called from function tipc_sendmsg(), which has done all sanity checks 976 * Returns the number of bytes sent on success, or errno 977 */ 978 static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, 979 int dlen, long timeout) 980 { 981 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 982 struct sock *sk = sock->sk; 983 struct tipc_sock *tsk = tipc_sk(sk); 984 struct list_head *cong_links = &tsk->cong_links; 985 int blks = tsk_blocks(GROUP_H_SIZE + dlen); 986 struct tipc_msg *hdr = &tsk->phdr; 987 struct tipc_member *first = NULL; 988 struct tipc_member *mbr = NULL; 989 struct net *net = sock_net(sk); 990 u32 node, port, exclude; 991 struct list_head dsts; 992 u32 type, inst, scope; 993 int lookups = 0; 994 int dstcnt, rc; 995 bool cong; 996 997 INIT_LIST_HEAD(&dsts); 998 999 type = msg_nametype(hdr); 1000 inst = dest->addr.name.name.instance; 1001 scope = msg_lookup_scope(hdr); 1002 1003 while (++lookups < 4) { 1004 exclude = tipc_group_exclude(tsk->group); 1005 1006 first = NULL; 1007 1008 /* Look for a non-congested destination member, if any */ 1009 while (1) { 1010 if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, 1011 &dstcnt, exclude, false)) 1012 return -EHOSTUNREACH; 1013 tipc_dest_pop(&dsts, &node, &port); 1014 cong = tipc_group_cong(tsk->group, node, port, blks, 1015 &mbr); 1016 if (!cong) 1017 break; 1018 if (mbr == first) 1019 break; 1020 if (!first) 1021 first = mbr; 1022 } 1023 1024 /* Start over if destination was not in member list */ 1025 if (unlikely(!mbr)) 1026 continue; 1027 1028 if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) 1029 break; 1030 1031 /* Block or return if destination link or member is congested */ 1032 rc = tipc_wait_for_cond(sock, &timeout, 1033 !tipc_dest_find(cong_links, node, 0) && 1034 tsk->group && 1035 !tipc_group_cong(tsk->group, node, port, 1036 blks, &mbr)); 1037 if (unlikely(rc)) 1038 return rc; 1039 1040 /* Send, unless destination disappeared while waiting */ 1041 if (likely(mbr)) 1042 break; 1043 } 1044 1045 if (unlikely(lookups >= 4)) 1046 return -EHOSTUNREACH; 1047 1048 rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); 1049 1050 return rc ? rc : dlen; 1051 } 1052 1053 /** 1054 * tipc_send_group_bcast - send message to all members in communication group 1055 * @sock: socket structure 1056 * @m: message to send 1057 * @dlen: total length of message data 1058 * @timeout: timeout to wait for wakeup 1059 * 1060 * Called from function tipc_sendmsg(), which has done all sanity checks 1061 * Returns the number of bytes sent on success, or errno 1062 */ 1063 static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, 1064 int dlen, long timeout) 1065 { 1066 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1067 struct sock *sk = sock->sk; 1068 struct net *net = sock_net(sk); 1069 struct tipc_sock *tsk = tipc_sk(sk); 1070 struct tipc_nlist *dsts; 1071 struct tipc_mc_method *method = &tsk->mc_method; 1072 bool ack = method->mandatory && method->rcast; 1073 int blks = tsk_blocks(MCAST_H_SIZE + dlen); 1074 struct tipc_msg *hdr = &tsk->phdr; 1075 int mtu = tipc_bcast_get_mtu(net); 1076 struct sk_buff_head pkts; 1077 int rc = -EHOSTUNREACH; 1078 1079 /* Block or return if any destination link or member is congested */ 1080 rc = tipc_wait_for_cond(sock, &timeout, 1081 !tsk->cong_link_cnt && tsk->group && 1082 !tipc_group_bc_cong(tsk->group, blks)); 1083 if (unlikely(rc)) 1084 return rc; 1085 1086 dsts = tipc_group_dests(tsk->group); 1087 if (!dsts->local && !dsts->remote) 1088 return -EHOSTUNREACH; 1089 1090 /* Complete message header */ 1091 if (dest) { 1092 msg_set_type(hdr, TIPC_GRP_MCAST_MSG); 1093 msg_set_nameinst(hdr, dest->addr.name.name.instance); 1094 } else { 1095 msg_set_type(hdr, TIPC_GRP_BCAST_MSG); 1096 msg_set_nameinst(hdr, 0); 1097 } 1098 msg_set_hdr_sz(hdr, GROUP_H_SIZE); 1099 msg_set_destport(hdr, 0); 1100 msg_set_destnode(hdr, 0); 1101 msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group)); 1102 1103 /* Avoid getting stuck with repeated forced replicasts */ 1104 msg_set_grp_bc_ack_req(hdr, ack); 1105 1106 /* Build message as chain of buffers */ 1107 __skb_queue_head_init(&pkts); 1108 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); 1109 if (unlikely(rc != dlen)) 1110 return rc; 1111 1112 /* Send message */ 1113 rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); 1114 if (unlikely(rc)) 1115 return rc; 1116 1117 /* Update broadcast sequence number and send windows */ 1118 tipc_group_update_bc_members(tsk->group, blks, ack); 1119 1120 /* Broadcast link is now free to choose method for next broadcast */ 1121 method->mandatory = false; 1122 method->expires = jiffies; 1123 1124 return dlen; 1125 } 1126 1127 /** 1128 * tipc_send_group_mcast - send message to all members with given identity 1129 * @sock: socket structure 1130 * @m: message to send 1131 * @dlen: total length of message data 1132 * @timeout: timeout to wait for wakeup 1133 * 1134 * Called from function tipc_sendmsg(), which has done all sanity checks 1135 * Returns the number of bytes sent on success, or errno 1136 */ 1137 static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, 1138 int dlen, long timeout) 1139 { 1140 struct sock *sk = sock->sk; 1141 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1142 struct tipc_sock *tsk = tipc_sk(sk); 1143 struct tipc_group *grp = tsk->group; 1144 struct tipc_msg *hdr = &tsk->phdr; 1145 struct net *net = sock_net(sk); 1146 u32 type, inst, scope, exclude; 1147 struct list_head dsts; 1148 u32 dstcnt; 1149 1150 INIT_LIST_HEAD(&dsts); 1151 1152 type = msg_nametype(hdr); 1153 inst = dest->addr.name.name.instance; 1154 scope = msg_lookup_scope(hdr); 1155 exclude = tipc_group_exclude(grp); 1156 1157 if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, 1158 &dstcnt, exclude, true)) 1159 return -EHOSTUNREACH; 1160 1161 if (dstcnt == 1) { 1162 tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); 1163 return tipc_send_group_unicast(sock, m, dlen, timeout); 1164 } 1165 1166 tipc_dest_list_purge(&dsts); 1167 return tipc_send_group_bcast(sock, m, dlen, timeout); 1168 } 1169 1170 /** 1171 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets 1172 * @arrvq: queue with arriving messages, to be cloned after destination lookup 1173 * @inputq: queue with cloned messages, delivered to socket after dest lookup 1174 * 1175 * Multi-threaded: parallel calls with reference to same queues may occur 1176 */ 1177 void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, 1178 struct sk_buff_head *inputq) 1179 { 1180 u32 self = tipc_own_addr(net); 1181 u32 type, lower, upper, scope; 1182 struct sk_buff *skb, *_skb; 1183 u32 portid, onode; 1184 struct sk_buff_head tmpq; 1185 struct list_head dports; 1186 struct tipc_msg *hdr; 1187 int user, mtyp, hlen; 1188 bool exact; 1189 1190 __skb_queue_head_init(&tmpq); 1191 INIT_LIST_HEAD(&dports); 1192 1193 skb = tipc_skb_peek(arrvq, &inputq->lock); 1194 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { 1195 hdr = buf_msg(skb); 1196 user = msg_user(hdr); 1197 mtyp = msg_type(hdr); 1198 hlen = skb_headroom(skb) + msg_hdr_sz(hdr); 1199 onode = msg_orignode(hdr); 1200 type = msg_nametype(hdr); 1201 1202 if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { 1203 spin_lock_bh(&inputq->lock); 1204 if (skb_peek(arrvq) == skb) { 1205 __skb_dequeue(arrvq); 1206 __skb_queue_tail(inputq, skb); 1207 } 1208 kfree_skb(skb); 1209 spin_unlock_bh(&inputq->lock); 1210 continue; 1211 } 1212 1213 /* Group messages require exact scope match */ 1214 if (msg_in_group(hdr)) { 1215 lower = 0; 1216 upper = ~0; 1217 scope = msg_lookup_scope(hdr); 1218 exact = true; 1219 } else { 1220 /* TIPC_NODE_SCOPE means "any scope" in this context */ 1221 if (onode == self) 1222 scope = TIPC_NODE_SCOPE; 1223 else 1224 scope = TIPC_CLUSTER_SCOPE; 1225 exact = false; 1226 lower = msg_namelower(hdr); 1227 upper = msg_nameupper(hdr); 1228 } 1229 1230 /* Create destination port list: */ 1231 tipc_nametbl_mc_lookup(net, type, lower, upper, 1232 scope, exact, &dports); 1233 1234 /* Clone message per destination */ 1235 while (tipc_dest_pop(&dports, NULL, &portid)) { 1236 _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); 1237 if (_skb) { 1238 msg_set_destport(buf_msg(_skb), portid); 1239 __skb_queue_tail(&tmpq, _skb); 1240 continue; 1241 } 1242 pr_warn("Failed to clone mcast rcv buffer\n"); 1243 } 1244 /* Append to inputq if not already done by other thread */ 1245 spin_lock_bh(&inputq->lock); 1246 if (skb_peek(arrvq) == skb) { 1247 skb_queue_splice_tail_init(&tmpq, inputq); 1248 kfree_skb(__skb_dequeue(arrvq)); 1249 } 1250 spin_unlock_bh(&inputq->lock); 1251 __skb_queue_purge(&tmpq); 1252 kfree_skb(skb); 1253 } 1254 tipc_sk_rcv(net, inputq); 1255 } 1256 1257 /* tipc_sk_push_backlog(): send accumulated buffers in socket write queue 1258 * when socket is in Nagle mode 1259 */ 1260 static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack) 1261 { 1262 struct sk_buff_head *txq = &tsk->sk.sk_write_queue; 1263 struct sk_buff *skb = skb_peek_tail(txq); 1264 struct net *net = sock_net(&tsk->sk); 1265 u32 dnode = tsk_peer_node(tsk); 1266 int rc; 1267 1268 if (nagle_ack) { 1269 tsk->pkt_cnt += skb_queue_len(txq); 1270 if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) { 1271 tsk->oneway = 0; 1272 if (tsk->nagle_start < NAGLE_START_MAX) 1273 tsk->nagle_start *= 2; 1274 tsk->expect_ack = false; 1275 pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n", 1276 tsk->portid, tsk->msg_acc, tsk->pkt_cnt, 1277 tsk->nagle_start); 1278 } else { 1279 tsk->nagle_start = NAGLE_START_INIT; 1280 if (skb) { 1281 msg_set_ack_required(buf_msg(skb)); 1282 tsk->expect_ack = true; 1283 } else { 1284 tsk->expect_ack = false; 1285 } 1286 } 1287 tsk->msg_acc = 0; 1288 tsk->pkt_cnt = 0; 1289 } 1290 1291 if (!skb || tsk->cong_link_cnt) 1292 return; 1293 1294 /* Do not send SYN again after congestion */ 1295 if (msg_is_syn(buf_msg(skb))) 1296 return; 1297 1298 if (tsk->msg_acc) 1299 tsk->pkt_cnt += skb_queue_len(txq); 1300 tsk->snt_unacked += tsk->snd_backlog; 1301 tsk->snd_backlog = 0; 1302 rc = tipc_node_xmit(net, txq, dnode, tsk->portid); 1303 if (rc == -ELINKCONG) 1304 tsk->cong_link_cnt = 1; 1305 } 1306 1307 /** 1308 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message 1309 * @tsk: receiving socket 1310 * @skb: pointer to message buffer. 1311 */ 1312 static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, 1313 struct sk_buff_head *inputq, 1314 struct sk_buff_head *xmitq) 1315 { 1316 struct tipc_msg *hdr = buf_msg(skb); 1317 u32 onode = tsk_own_node(tsk); 1318 struct sock *sk = &tsk->sk; 1319 int mtyp = msg_type(hdr); 1320 bool was_cong; 1321 1322 /* Ignore if connection cannot be validated: */ 1323 if (!tsk_peer_msg(tsk, hdr)) { 1324 trace_tipc_sk_drop_msg(sk, skb, TIPC_DUMP_NONE, "@proto_rcv!"); 1325 goto exit; 1326 } 1327 1328 if (unlikely(msg_errcode(hdr))) { 1329 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 1330 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), 1331 tsk_peer_port(tsk)); 1332 sk->sk_state_change(sk); 1333 1334 /* State change is ignored if socket already awake, 1335 * - convert msg to abort msg and add to inqueue 1336 */ 1337 msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); 1338 msg_set_type(hdr, TIPC_CONN_MSG); 1339 msg_set_size(hdr, BASIC_H_SIZE); 1340 msg_set_hdr_sz(hdr, BASIC_H_SIZE); 1341 __skb_queue_tail(inputq, skb); 1342 return; 1343 } 1344 1345 tsk->probe_unacked = false; 1346 1347 if (mtyp == CONN_PROBE) { 1348 msg_set_type(hdr, CONN_PROBE_REPLY); 1349 if (tipc_msg_reverse(onode, &skb, TIPC_OK)) 1350 __skb_queue_tail(xmitq, skb); 1351 return; 1352 } else if (mtyp == CONN_ACK) { 1353 was_cong = tsk_conn_cong(tsk); 1354 tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr)); 1355 tsk->snt_unacked -= msg_conn_ack(hdr); 1356 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) 1357 tsk->snd_win = msg_adv_win(hdr); 1358 if (was_cong && !tsk_conn_cong(tsk)) 1359 sk->sk_write_space(sk); 1360 } else if (mtyp != CONN_PROBE_REPLY) { 1361 pr_warn("Received unknown CONN_PROTO msg\n"); 1362 } 1363 exit: 1364 kfree_skb(skb); 1365 } 1366 1367 /** 1368 * tipc_sendmsg - send message in connectionless manner 1369 * @sock: socket structure 1370 * @m: message to send 1371 * @dsz: amount of user data to be sent 1372 * 1373 * Message must have an destination specified explicitly. 1374 * Used for SOCK_RDM and SOCK_DGRAM messages, 1375 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. 1376 * (Note: 'SYN+' is prohibited on SOCK_STREAM.) 1377 * 1378 * Returns the number of bytes sent on success, or errno otherwise 1379 */ 1380 static int tipc_sendmsg(struct socket *sock, 1381 struct msghdr *m, size_t dsz) 1382 { 1383 struct sock *sk = sock->sk; 1384 int ret; 1385 1386 lock_sock(sk); 1387 ret = __tipc_sendmsg(sock, m, dsz); 1388 release_sock(sk); 1389 1390 return ret; 1391 } 1392 1393 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) 1394 { 1395 struct sock *sk = sock->sk; 1396 struct net *net = sock_net(sk); 1397 struct tipc_sock *tsk = tipc_sk(sk); 1398 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1399 long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1400 struct list_head *clinks = &tsk->cong_links; 1401 bool syn = !tipc_sk_type_connectionless(sk); 1402 struct tipc_group *grp = tsk->group; 1403 struct tipc_msg *hdr = &tsk->phdr; 1404 struct tipc_name_seq *seq; 1405 struct sk_buff_head pkts; 1406 u32 dport = 0, dnode = 0; 1407 u32 type = 0, inst = 0; 1408 int mtu, rc; 1409 1410 if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) 1411 return -EMSGSIZE; 1412 1413 if (likely(dest)) { 1414 if (unlikely(m->msg_namelen < sizeof(*dest))) 1415 return -EINVAL; 1416 if (unlikely(dest->family != AF_TIPC)) 1417 return -EINVAL; 1418 } 1419 1420 if (grp) { 1421 if (!dest) 1422 return tipc_send_group_bcast(sock, m, dlen, timeout); 1423 if (dest->addrtype == TIPC_ADDR_NAME) 1424 return tipc_send_group_anycast(sock, m, dlen, timeout); 1425 if (dest->addrtype == TIPC_ADDR_ID) 1426 return tipc_send_group_unicast(sock, m, dlen, timeout); 1427 if (dest->addrtype == TIPC_ADDR_MCAST) 1428 return tipc_send_group_mcast(sock, m, dlen, timeout); 1429 return -EINVAL; 1430 } 1431 1432 if (unlikely(!dest)) { 1433 dest = &tsk->peer; 1434 if (!syn && dest->family != AF_TIPC) 1435 return -EDESTADDRREQ; 1436 } 1437 1438 if (unlikely(syn)) { 1439 if (sk->sk_state == TIPC_LISTEN) 1440 return -EPIPE; 1441 if (sk->sk_state != TIPC_OPEN) 1442 return -EISCONN; 1443 if (tsk->published) 1444 return -EOPNOTSUPP; 1445 if (dest->addrtype == TIPC_ADDR_NAME) { 1446 tsk->conn_type = dest->addr.name.name.type; 1447 tsk->conn_instance = dest->addr.name.name.instance; 1448 } 1449 msg_set_syn(hdr, 1); 1450 } 1451 1452 seq = &dest->addr.nameseq; 1453 if (dest->addrtype == TIPC_ADDR_MCAST) 1454 return tipc_sendmcast(sock, seq, m, dlen, timeout); 1455 1456 if (dest->addrtype == TIPC_ADDR_NAME) { 1457 type = dest->addr.name.name.type; 1458 inst = dest->addr.name.name.instance; 1459 dnode = dest->addr.name.domain; 1460 dport = tipc_nametbl_translate(net, type, inst, &dnode); 1461 if (unlikely(!dport && !dnode)) 1462 return -EHOSTUNREACH; 1463 } else if (dest->addrtype == TIPC_ADDR_ID) { 1464 dnode = dest->addr.id.node; 1465 } else { 1466 return -EINVAL; 1467 } 1468 1469 /* Block or return if destination link is congested */ 1470 rc = tipc_wait_for_cond(sock, &timeout, 1471 !tipc_dest_find(clinks, dnode, 0)); 1472 if (unlikely(rc)) 1473 return rc; 1474 1475 if (dest->addrtype == TIPC_ADDR_NAME) { 1476 msg_set_type(hdr, TIPC_NAMED_MSG); 1477 msg_set_hdr_sz(hdr, NAMED_H_SIZE); 1478 msg_set_nametype(hdr, type); 1479 msg_set_nameinst(hdr, inst); 1480 msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); 1481 msg_set_destnode(hdr, dnode); 1482 msg_set_destport(hdr, dport); 1483 } else { /* TIPC_ADDR_ID */ 1484 msg_set_type(hdr, TIPC_DIRECT_MSG); 1485 msg_set_lookup_scope(hdr, 0); 1486 msg_set_destnode(hdr, dnode); 1487 msg_set_destport(hdr, dest->addr.id.ref); 1488 msg_set_hdr_sz(hdr, BASIC_H_SIZE); 1489 } 1490 1491 __skb_queue_head_init(&pkts); 1492 mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true); 1493 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); 1494 if (unlikely(rc != dlen)) 1495 return rc; 1496 if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) { 1497 __skb_queue_purge(&pkts); 1498 return -ENOMEM; 1499 } 1500 1501 trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); 1502 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); 1503 if (unlikely(rc == -ELINKCONG)) { 1504 tipc_dest_push(clinks, dnode, 0); 1505 tsk->cong_link_cnt++; 1506 rc = 0; 1507 } 1508 1509 if (unlikely(syn && !rc)) 1510 tipc_set_sk_state(sk, TIPC_CONNECTING); 1511 1512 return rc ? rc : dlen; 1513 } 1514 1515 /** 1516 * tipc_sendstream - send stream-oriented data 1517 * @sock: socket structure 1518 * @m: data to send 1519 * @dsz: total length of data to be transmitted 1520 * 1521 * Used for SOCK_STREAM data. 1522 * 1523 * Returns the number of bytes sent on success (or partial success), 1524 * or errno if no data sent 1525 */ 1526 static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz) 1527 { 1528 struct sock *sk = sock->sk; 1529 int ret; 1530 1531 lock_sock(sk); 1532 ret = __tipc_sendstream(sock, m, dsz); 1533 release_sock(sk); 1534 1535 return ret; 1536 } 1537 1538 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) 1539 { 1540 struct sock *sk = sock->sk; 1541 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1542 long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1543 struct sk_buff_head *txq = &sk->sk_write_queue; 1544 struct tipc_sock *tsk = tipc_sk(sk); 1545 struct tipc_msg *hdr = &tsk->phdr; 1546 struct net *net = sock_net(sk); 1547 struct sk_buff *skb; 1548 u32 dnode = tsk_peer_node(tsk); 1549 int maxnagle = tsk->maxnagle; 1550 int maxpkt = tsk->max_pkt; 1551 int send, sent = 0; 1552 int blocks, rc = 0; 1553 1554 if (unlikely(dlen > INT_MAX)) 1555 return -EMSGSIZE; 1556 1557 /* Handle implicit connection setup */ 1558 if (unlikely(dest)) { 1559 rc = __tipc_sendmsg(sock, m, dlen); 1560 if (dlen && dlen == rc) { 1561 tsk->peer_caps = tipc_node_get_capabilities(net, dnode); 1562 tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); 1563 } 1564 return rc; 1565 } 1566 1567 do { 1568 rc = tipc_wait_for_cond(sock, &timeout, 1569 (!tsk->cong_link_cnt && 1570 !tsk_conn_cong(tsk) && 1571 tipc_sk_connected(sk))); 1572 if (unlikely(rc)) 1573 break; 1574 send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); 1575 blocks = tsk->snd_backlog; 1576 if (tsk->oneway++ >= tsk->nagle_start && maxnagle && 1577 send <= maxnagle) { 1578 rc = tipc_msg_append(hdr, m, send, maxnagle, txq); 1579 if (unlikely(rc < 0)) 1580 break; 1581 blocks += rc; 1582 tsk->msg_acc++; 1583 if (blocks <= 64 && tsk->expect_ack) { 1584 tsk->snd_backlog = blocks; 1585 sent += send; 1586 break; 1587 } else if (blocks > 64) { 1588 tsk->pkt_cnt += skb_queue_len(txq); 1589 } else { 1590 skb = skb_peek_tail(txq); 1591 if (skb) { 1592 msg_set_ack_required(buf_msg(skb)); 1593 tsk->expect_ack = true; 1594 } else { 1595 tsk->expect_ack = false; 1596 } 1597 tsk->msg_acc = 0; 1598 tsk->pkt_cnt = 0; 1599 } 1600 } else { 1601 rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq); 1602 if (unlikely(rc != send)) 1603 break; 1604 blocks += tsk_inc(tsk, send + MIN_H_SIZE); 1605 } 1606 trace_tipc_sk_sendstream(sk, skb_peek(txq), 1607 TIPC_DUMP_SK_SNDQ, " "); 1608 rc = tipc_node_xmit(net, txq, dnode, tsk->portid); 1609 if (unlikely(rc == -ELINKCONG)) { 1610 tsk->cong_link_cnt = 1; 1611 rc = 0; 1612 } 1613 if (likely(!rc)) { 1614 tsk->snt_unacked += blocks; 1615 tsk->snd_backlog = 0; 1616 sent += send; 1617 } 1618 } while (sent < dlen && !rc); 1619 1620 return sent ? sent : rc; 1621 } 1622 1623 /** 1624 * tipc_send_packet - send a connection-oriented message 1625 * @sock: socket structure 1626 * @m: message to send 1627 * @dsz: length of data to be transmitted 1628 * 1629 * Used for SOCK_SEQPACKET messages. 1630 * 1631 * Returns the number of bytes sent on success, or errno otherwise 1632 */ 1633 static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) 1634 { 1635 if (dsz > TIPC_MAX_USER_MSG_SIZE) 1636 return -EMSGSIZE; 1637 1638 return tipc_sendstream(sock, m, dsz); 1639 } 1640 1641 /* tipc_sk_finish_conn - complete the setup of a connection 1642 */ 1643 static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, 1644 u32 peer_node) 1645 { 1646 struct sock *sk = &tsk->sk; 1647 struct net *net = sock_net(sk); 1648 struct tipc_msg *msg = &tsk->phdr; 1649 1650 msg_set_syn(msg, 0); 1651 msg_set_destnode(msg, peer_node); 1652 msg_set_destport(msg, peer_port); 1653 msg_set_type(msg, TIPC_CONN_MSG); 1654 msg_set_lookup_scope(msg, 0); 1655 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1656 1657 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); 1658 tipc_set_sk_state(sk, TIPC_ESTABLISHED); 1659 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1660 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true); 1661 tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); 1662 tsk_set_nagle(tsk); 1663 __skb_queue_purge(&sk->sk_write_queue); 1664 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) 1665 return; 1666 1667 /* Fall back to message based flow control */ 1668 tsk->rcv_win = FLOWCTL_MSG_WIN; 1669 tsk->snd_win = FLOWCTL_MSG_WIN; 1670 } 1671 1672 /** 1673 * tipc_sk_set_orig_addr - capture sender's address for received message 1674 * @m: descriptor for message info 1675 * @skb: received message 1676 * 1677 * Note: Address is not captured if not requested by receiver. 1678 */ 1679 static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) 1680 { 1681 DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); 1682 struct tipc_msg *hdr = buf_msg(skb); 1683 1684 if (!srcaddr) 1685 return; 1686 1687 srcaddr->sock.family = AF_TIPC; 1688 srcaddr->sock.addrtype = TIPC_ADDR_ID; 1689 srcaddr->sock.scope = 0; 1690 srcaddr->sock.addr.id.ref = msg_origport(hdr); 1691 srcaddr->sock.addr.id.node = msg_orignode(hdr); 1692 srcaddr->sock.addr.name.domain = 0; 1693 m->msg_namelen = sizeof(struct sockaddr_tipc); 1694 1695 if (!msg_in_group(hdr)) 1696 return; 1697 1698 /* Group message users may also want to know sending member's id */ 1699 srcaddr->member.family = AF_TIPC; 1700 srcaddr->member.addrtype = TIPC_ADDR_NAME; 1701 srcaddr->member.scope = 0; 1702 srcaddr->member.addr.name.name.type = msg_nametype(hdr); 1703 srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; 1704 srcaddr->member.addr.name.domain = 0; 1705 m->msg_namelen = sizeof(*srcaddr); 1706 } 1707 1708 /** 1709 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message 1710 * @m: descriptor for message info 1711 * @skb: received message buffer 1712 * @tsk: TIPC port associated with message 1713 * 1714 * Note: Ancillary data is not captured if not requested by receiver. 1715 * 1716 * Returns 0 if successful, otherwise errno 1717 */ 1718 static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, 1719 struct tipc_sock *tsk) 1720 { 1721 struct tipc_msg *msg; 1722 u32 anc_data[3]; 1723 u32 err; 1724 u32 dest_type; 1725 int has_name; 1726 int res; 1727 1728 if (likely(m->msg_controllen == 0)) 1729 return 0; 1730 msg = buf_msg(skb); 1731 1732 /* Optionally capture errored message object(s) */ 1733 err = msg ? msg_errcode(msg) : 0; 1734 if (unlikely(err)) { 1735 anc_data[0] = err; 1736 anc_data[1] = msg_data_sz(msg); 1737 res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); 1738 if (res) 1739 return res; 1740 if (anc_data[1]) { 1741 if (skb_linearize(skb)) 1742 return -ENOMEM; 1743 msg = buf_msg(skb); 1744 res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], 1745 msg_data(msg)); 1746 if (res) 1747 return res; 1748 } 1749 } 1750 1751 /* Optionally capture message destination object */ 1752 dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; 1753 switch (dest_type) { 1754 case TIPC_NAMED_MSG: 1755 has_name = 1; 1756 anc_data[0] = msg_nametype(msg); 1757 anc_data[1] = msg_namelower(msg); 1758 anc_data[2] = msg_namelower(msg); 1759 break; 1760 case TIPC_MCAST_MSG: 1761 has_name = 1; 1762 anc_data[0] = msg_nametype(msg); 1763 anc_data[1] = msg_namelower(msg); 1764 anc_data[2] = msg_nameupper(msg); 1765 break; 1766 case TIPC_CONN_MSG: 1767 has_name = (tsk->conn_type != 0); 1768 anc_data[0] = tsk->conn_type; 1769 anc_data[1] = tsk->conn_instance; 1770 anc_data[2] = tsk->conn_instance; 1771 break; 1772 default: 1773 has_name = 0; 1774 } 1775 if (has_name) { 1776 res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); 1777 if (res) 1778 return res; 1779 } 1780 1781 return 0; 1782 } 1783 1784 static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk) 1785 { 1786 struct sock *sk = &tsk->sk; 1787 struct sk_buff *skb = NULL; 1788 struct tipc_msg *msg; 1789 u32 peer_port = tsk_peer_port(tsk); 1790 u32 dnode = tsk_peer_node(tsk); 1791 1792 if (!tipc_sk_connected(sk)) 1793 return NULL; 1794 skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, 1795 dnode, tsk_own_node(tsk), peer_port, 1796 tsk->portid, TIPC_OK); 1797 if (!skb) 1798 return NULL; 1799 msg = buf_msg(skb); 1800 msg_set_conn_ack(msg, tsk->rcv_unacked); 1801 tsk->rcv_unacked = 0; 1802 1803 /* Adjust to and advertize the correct window limit */ 1804 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { 1805 tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); 1806 msg_set_adv_win(msg, tsk->rcv_win); 1807 } 1808 return skb; 1809 } 1810 1811 static void tipc_sk_send_ack(struct tipc_sock *tsk) 1812 { 1813 struct sk_buff *skb; 1814 1815 skb = tipc_sk_build_ack(tsk); 1816 if (!skb) 1817 return; 1818 1819 tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk), 1820 msg_link_selector(buf_msg(skb))); 1821 } 1822 1823 static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) 1824 { 1825 struct sock *sk = sock->sk; 1826 DEFINE_WAIT_FUNC(wait, woken_wake_function); 1827 long timeo = *timeop; 1828 int err = sock_error(sk); 1829 1830 if (err) 1831 return err; 1832 1833 for (;;) { 1834 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { 1835 if (sk->sk_shutdown & RCV_SHUTDOWN) { 1836 err = -ENOTCONN; 1837 break; 1838 } 1839 add_wait_queue(sk_sleep(sk), &wait); 1840 release_sock(sk); 1841 timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); 1842 sched_annotate_sleep(); 1843 lock_sock(sk); 1844 remove_wait_queue(sk_sleep(sk), &wait); 1845 } 1846 err = 0; 1847 if (!skb_queue_empty(&sk->sk_receive_queue)) 1848 break; 1849 err = -EAGAIN; 1850 if (!timeo) 1851 break; 1852 err = sock_intr_errno(timeo); 1853 if (signal_pending(current)) 1854 break; 1855 1856 err = sock_error(sk); 1857 if (err) 1858 break; 1859 } 1860 *timeop = timeo; 1861 return err; 1862 } 1863 1864 /** 1865 * tipc_recvmsg - receive packet-oriented message 1866 * @m: descriptor for message info 1867 * @buflen: length of user buffer area 1868 * @flags: receive flags 1869 * 1870 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. 1871 * If the complete message doesn't fit in user area, truncate it. 1872 * 1873 * Returns size of returned message data, errno otherwise 1874 */ 1875 static int tipc_recvmsg(struct socket *sock, struct msghdr *m, 1876 size_t buflen, int flags) 1877 { 1878 struct sock *sk = sock->sk; 1879 bool connected = !tipc_sk_type_connectionless(sk); 1880 struct tipc_sock *tsk = tipc_sk(sk); 1881 int rc, err, hlen, dlen, copy; 1882 struct sk_buff_head xmitq; 1883 struct tipc_msg *hdr; 1884 struct sk_buff *skb; 1885 bool grp_evt; 1886 long timeout; 1887 1888 /* Catch invalid receive requests */ 1889 if (unlikely(!buflen)) 1890 return -EINVAL; 1891 1892 lock_sock(sk); 1893 if (unlikely(connected && sk->sk_state == TIPC_OPEN)) { 1894 rc = -ENOTCONN; 1895 goto exit; 1896 } 1897 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1898 1899 /* Step rcv queue to first msg with data or error; wait if necessary */ 1900 do { 1901 rc = tipc_wait_for_rcvmsg(sock, &timeout); 1902 if (unlikely(rc)) 1903 goto exit; 1904 skb = skb_peek(&sk->sk_receive_queue); 1905 hdr = buf_msg(skb); 1906 dlen = msg_data_sz(hdr); 1907 hlen = msg_hdr_sz(hdr); 1908 err = msg_errcode(hdr); 1909 grp_evt = msg_is_grp_evt(hdr); 1910 if (likely(dlen || err)) 1911 break; 1912 tsk_advance_rx_queue(sk); 1913 } while (1); 1914 1915 /* Collect msg meta data, including error code and rejected data */ 1916 tipc_sk_set_orig_addr(m, skb); 1917 rc = tipc_sk_anc_data_recv(m, skb, tsk); 1918 if (unlikely(rc)) 1919 goto exit; 1920 hdr = buf_msg(skb); 1921 1922 /* Capture data if non-error msg, otherwise just set return value */ 1923 if (likely(!err)) { 1924 copy = min_t(int, dlen, buflen); 1925 if (unlikely(copy != dlen)) 1926 m->msg_flags |= MSG_TRUNC; 1927 rc = skb_copy_datagram_msg(skb, hlen, m, copy); 1928 } else { 1929 copy = 0; 1930 rc = 0; 1931 if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) 1932 rc = -ECONNRESET; 1933 } 1934 if (unlikely(rc)) 1935 goto exit; 1936 1937 /* Mark message as group event if applicable */ 1938 if (unlikely(grp_evt)) { 1939 if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) 1940 m->msg_flags |= MSG_EOR; 1941 m->msg_flags |= MSG_OOB; 1942 copy = 0; 1943 } 1944 1945 /* Caption of data or error code/rejected data was successful */ 1946 if (unlikely(flags & MSG_PEEK)) 1947 goto exit; 1948 1949 /* Send group flow control advertisement when applicable */ 1950 if (tsk->group && msg_in_group(hdr) && !grp_evt) { 1951 __skb_queue_head_init(&xmitq); 1952 tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), 1953 msg_orignode(hdr), msg_origport(hdr), 1954 &xmitq); 1955 tipc_node_distr_xmit(sock_net(sk), &xmitq); 1956 } 1957 1958 tsk_advance_rx_queue(sk); 1959 1960 if (likely(!connected)) 1961 goto exit; 1962 1963 /* Send connection flow control advertisement when applicable */ 1964 tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); 1965 if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) 1966 tipc_sk_send_ack(tsk); 1967 exit: 1968 release_sock(sk); 1969 return rc ? rc : copy; 1970 } 1971 1972 /** 1973 * tipc_recvstream - receive stream-oriented data 1974 * @m: descriptor for message info 1975 * @buflen: total size of user buffer area 1976 * @flags: receive flags 1977 * 1978 * Used for SOCK_STREAM messages only. If not enough data is available 1979 * will optionally wait for more; never truncates data. 1980 * 1981 * Returns size of returned message data, errno otherwise 1982 */ 1983 static int tipc_recvstream(struct socket *sock, struct msghdr *m, 1984 size_t buflen, int flags) 1985 { 1986 struct sock *sk = sock->sk; 1987 struct tipc_sock *tsk = tipc_sk(sk); 1988 struct sk_buff *skb; 1989 struct tipc_msg *hdr; 1990 struct tipc_skb_cb *skb_cb; 1991 bool peek = flags & MSG_PEEK; 1992 int offset, required, copy, copied = 0; 1993 int hlen, dlen, err, rc; 1994 long timeout; 1995 1996 /* Catch invalid receive attempts */ 1997 if (unlikely(!buflen)) 1998 return -EINVAL; 1999 2000 lock_sock(sk); 2001 2002 if (unlikely(sk->sk_state == TIPC_OPEN)) { 2003 rc = -ENOTCONN; 2004 goto exit; 2005 } 2006 required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen); 2007 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2008 2009 do { 2010 /* Look at first msg in receive queue; wait if necessary */ 2011 rc = tipc_wait_for_rcvmsg(sock, &timeout); 2012 if (unlikely(rc)) 2013 break; 2014 skb = skb_peek(&sk->sk_receive_queue); 2015 skb_cb = TIPC_SKB_CB(skb); 2016 hdr = buf_msg(skb); 2017 dlen = msg_data_sz(hdr); 2018 hlen = msg_hdr_sz(hdr); 2019 err = msg_errcode(hdr); 2020 2021 /* Discard any empty non-errored (SYN-) message */ 2022 if (unlikely(!dlen && !err)) { 2023 tsk_advance_rx_queue(sk); 2024 continue; 2025 } 2026 2027 /* Collect msg meta data, incl. error code and rejected data */ 2028 if (!copied) { 2029 tipc_sk_set_orig_addr(m, skb); 2030 rc = tipc_sk_anc_data_recv(m, skb, tsk); 2031 if (rc) 2032 break; 2033 hdr = buf_msg(skb); 2034 } 2035 2036 /* Copy data if msg ok, otherwise return error/partial data */ 2037 if (likely(!err)) { 2038 offset = skb_cb->bytes_read; 2039 copy = min_t(int, dlen - offset, buflen - copied); 2040 rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); 2041 if (unlikely(rc)) 2042 break; 2043 copied += copy; 2044 offset += copy; 2045 if (unlikely(offset < dlen)) { 2046 if (!peek) 2047 skb_cb->bytes_read = offset; 2048 break; 2049 } 2050 } else { 2051 rc = 0; 2052 if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control) 2053 rc = -ECONNRESET; 2054 if (copied || rc) 2055 break; 2056 } 2057 2058 if (unlikely(peek)) 2059 break; 2060 2061 tsk_advance_rx_queue(sk); 2062 2063 /* Send connection flow control advertisement when applicable */ 2064 tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); 2065 if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) 2066 tipc_sk_send_ack(tsk); 2067 2068 /* Exit if all requested data or FIN/error received */ 2069 if (copied == buflen || err) 2070 break; 2071 2072 } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required); 2073 exit: 2074 release_sock(sk); 2075 return copied ? copied : rc; 2076 } 2077 2078 /** 2079 * tipc_write_space - wake up thread if port congestion is released 2080 * @sk: socket 2081 */ 2082 static void tipc_write_space(struct sock *sk) 2083 { 2084 struct socket_wq *wq; 2085 2086 rcu_read_lock(); 2087 wq = rcu_dereference(sk->sk_wq); 2088 if (skwq_has_sleeper(wq)) 2089 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | 2090 EPOLLWRNORM | EPOLLWRBAND); 2091 rcu_read_unlock(); 2092 } 2093 2094 /** 2095 * tipc_data_ready - wake up threads to indicate messages have been received 2096 * @sk: socket 2097 */ 2098 static void tipc_data_ready(struct sock *sk) 2099 { 2100 struct socket_wq *wq; 2101 2102 rcu_read_lock(); 2103 wq = rcu_dereference(sk->sk_wq); 2104 if (skwq_has_sleeper(wq)) 2105 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | 2106 EPOLLRDNORM | EPOLLRDBAND); 2107 rcu_read_unlock(); 2108 } 2109 2110 static void tipc_sock_destruct(struct sock *sk) 2111 { 2112 __skb_queue_purge(&sk->sk_receive_queue); 2113 } 2114 2115 static void tipc_sk_proto_rcv(struct sock *sk, 2116 struct sk_buff_head *inputq, 2117 struct sk_buff_head *xmitq) 2118 { 2119 struct sk_buff *skb = __skb_dequeue(inputq); 2120 struct tipc_sock *tsk = tipc_sk(sk); 2121 struct tipc_msg *hdr = buf_msg(skb); 2122 struct tipc_group *grp = tsk->group; 2123 bool wakeup = false; 2124 2125 switch (msg_user(hdr)) { 2126 case CONN_MANAGER: 2127 tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); 2128 return; 2129 case SOCK_WAKEUP: 2130 tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); 2131 /* coupled with smp_rmb() in tipc_wait_for_cond() */ 2132 smp_wmb(); 2133 tsk->cong_link_cnt--; 2134 wakeup = true; 2135 tipc_sk_push_backlog(tsk, false); 2136 break; 2137 case GROUP_PROTOCOL: 2138 tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); 2139 break; 2140 case TOP_SRV: 2141 tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, 2142 hdr, inputq, xmitq); 2143 break; 2144 default: 2145 break; 2146 } 2147 2148 if (wakeup) 2149 sk->sk_write_space(sk); 2150 2151 kfree_skb(skb); 2152 } 2153 2154 /** 2155 * tipc_sk_filter_connect - check incoming message for a connection-based socket 2156 * @tsk: TIPC socket 2157 * @skb: pointer to message buffer. 2158 * @xmitq: for Nagle ACK if any 2159 * Returns true if message should be added to receive queue, false otherwise 2160 */ 2161 static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, 2162 struct sk_buff_head *xmitq) 2163 { 2164 struct sock *sk = &tsk->sk; 2165 struct net *net = sock_net(sk); 2166 struct tipc_msg *hdr = buf_msg(skb); 2167 bool con_msg = msg_connected(hdr); 2168 u32 pport = tsk_peer_port(tsk); 2169 u32 pnode = tsk_peer_node(tsk); 2170 u32 oport = msg_origport(hdr); 2171 u32 onode = msg_orignode(hdr); 2172 int err = msg_errcode(hdr); 2173 unsigned long delay; 2174 2175 if (unlikely(msg_mcast(hdr))) 2176 return false; 2177 tsk->oneway = 0; 2178 2179 switch (sk->sk_state) { 2180 case TIPC_CONNECTING: 2181 /* Setup ACK */ 2182 if (likely(con_msg)) { 2183 if (err) 2184 break; 2185 tipc_sk_finish_conn(tsk, oport, onode); 2186 msg_set_importance(&tsk->phdr, msg_importance(hdr)); 2187 /* ACK+ message with data is added to receive queue */ 2188 if (msg_data_sz(hdr)) 2189 return true; 2190 /* Empty ACK-, - wake up sleeping connect() and drop */ 2191 sk->sk_state_change(sk); 2192 msg_set_dest_droppable(hdr, 1); 2193 return false; 2194 } 2195 /* Ignore connectionless message if not from listening socket */ 2196 if (oport != pport || onode != pnode) 2197 return false; 2198 2199 /* Rejected SYN */ 2200 if (err != TIPC_ERR_OVERLOAD) 2201 break; 2202 2203 /* Prepare for new setup attempt if we have a SYN clone */ 2204 if (skb_queue_empty(&sk->sk_write_queue)) 2205 break; 2206 get_random_bytes(&delay, 2); 2207 delay %= (tsk->conn_timeout / 4); 2208 delay = msecs_to_jiffies(delay + 100); 2209 sk_reset_timer(sk, &sk->sk_timer, jiffies + delay); 2210 return false; 2211 case TIPC_OPEN: 2212 case TIPC_DISCONNECTING: 2213 return false; 2214 case TIPC_LISTEN: 2215 /* Accept only SYN message */ 2216 if (!msg_is_syn(hdr) && 2217 tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT) 2218 return false; 2219 if (!con_msg && !err) 2220 return true; 2221 return false; 2222 case TIPC_ESTABLISHED: 2223 if (!skb_queue_empty(&sk->sk_write_queue)) 2224 tipc_sk_push_backlog(tsk, false); 2225 /* Accept only connection-based messages sent by peer */ 2226 if (likely(con_msg && !err && pport == oport && 2227 pnode == onode)) { 2228 if (msg_ack_required(hdr)) { 2229 struct sk_buff *skb; 2230 2231 skb = tipc_sk_build_ack(tsk); 2232 if (skb) { 2233 msg_set_nagle_ack(buf_msg(skb)); 2234 __skb_queue_tail(xmitq, skb); 2235 } 2236 } 2237 return true; 2238 } 2239 if (!tsk_peer_msg(tsk, hdr)) 2240 return false; 2241 if (!err) 2242 return true; 2243 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 2244 tipc_node_remove_conn(net, pnode, tsk->portid); 2245 sk->sk_state_change(sk); 2246 return true; 2247 default: 2248 pr_err("Unknown sk_state %u\n", sk->sk_state); 2249 } 2250 /* Abort connection setup attempt */ 2251 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 2252 sk->sk_err = ECONNREFUSED; 2253 sk->sk_state_change(sk); 2254 return true; 2255 } 2256 2257 /** 2258 * rcvbuf_limit - get proper overload limit of socket receive queue 2259 * @sk: socket 2260 * @skb: message 2261 * 2262 * For connection oriented messages, irrespective of importance, 2263 * default queue limit is 2 MB. 2264 * 2265 * For connectionless messages, queue limits are based on message 2266 * importance as follows: 2267 * 2268 * TIPC_LOW_IMPORTANCE (2 MB) 2269 * TIPC_MEDIUM_IMPORTANCE (4 MB) 2270 * TIPC_HIGH_IMPORTANCE (8 MB) 2271 * TIPC_CRITICAL_IMPORTANCE (16 MB) 2272 * 2273 * Returns overload limit according to corresponding message importance 2274 */ 2275 static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) 2276 { 2277 struct tipc_sock *tsk = tipc_sk(sk); 2278 struct tipc_msg *hdr = buf_msg(skb); 2279 2280 if (unlikely(msg_in_group(hdr))) 2281 return READ_ONCE(sk->sk_rcvbuf); 2282 2283 if (unlikely(!msg_connected(hdr))) 2284 return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr); 2285 2286 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) 2287 return READ_ONCE(sk->sk_rcvbuf); 2288 2289 return FLOWCTL_MSG_LIM; 2290 } 2291 2292 /** 2293 * tipc_sk_filter_rcv - validate incoming message 2294 * @sk: socket 2295 * @skb: pointer to message. 2296 * 2297 * Enqueues message on receive queue if acceptable; optionally handles 2298 * disconnect indication for a connected socket. 2299 * 2300 * Called with socket lock already taken 2301 * 2302 */ 2303 static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, 2304 struct sk_buff_head *xmitq) 2305 { 2306 bool sk_conn = !tipc_sk_type_connectionless(sk); 2307 struct tipc_sock *tsk = tipc_sk(sk); 2308 struct tipc_group *grp = tsk->group; 2309 struct tipc_msg *hdr = buf_msg(skb); 2310 struct net *net = sock_net(sk); 2311 struct sk_buff_head inputq; 2312 int mtyp = msg_type(hdr); 2313 int limit, err = TIPC_OK; 2314 2315 trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); 2316 TIPC_SKB_CB(skb)->bytes_read = 0; 2317 __skb_queue_head_init(&inputq); 2318 __skb_queue_tail(&inputq, skb); 2319 2320 if (unlikely(!msg_isdata(hdr))) 2321 tipc_sk_proto_rcv(sk, &inputq, xmitq); 2322 2323 if (unlikely(grp)) 2324 tipc_group_filter_msg(grp, &inputq, xmitq); 2325 2326 if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) 2327 tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); 2328 2329 /* Validate and add to receive buffer if there is space */ 2330 while ((skb = __skb_dequeue(&inputq))) { 2331 hdr = buf_msg(skb); 2332 limit = rcvbuf_limit(sk, skb); 2333 if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) || 2334 (!sk_conn && msg_connected(hdr)) || 2335 (!grp && msg_in_group(hdr))) 2336 err = TIPC_ERR_NO_PORT; 2337 else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { 2338 trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, 2339 "err_overload2!"); 2340 atomic_inc(&sk->sk_drops); 2341 err = TIPC_ERR_OVERLOAD; 2342 } 2343 2344 if (unlikely(err)) { 2345 if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) { 2346 trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, 2347 "@filter_rcv!"); 2348 __skb_queue_tail(xmitq, skb); 2349 } 2350 err = TIPC_OK; 2351 continue; 2352 } 2353 __skb_queue_tail(&sk->sk_receive_queue, skb); 2354 skb_set_owner_r(skb, sk); 2355 trace_tipc_sk_overlimit2(sk, skb, TIPC_DUMP_ALL, 2356 "rcvq >90% allocated!"); 2357 sk->sk_data_ready(sk); 2358 } 2359 } 2360 2361 /** 2362 * tipc_sk_backlog_rcv - handle incoming message from backlog queue 2363 * @sk: socket 2364 * @skb: message 2365 * 2366 * Caller must hold socket lock 2367 */ 2368 static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 2369 { 2370 unsigned int before = sk_rmem_alloc_get(sk); 2371 struct sk_buff_head xmitq; 2372 unsigned int added; 2373 2374 __skb_queue_head_init(&xmitq); 2375 2376 tipc_sk_filter_rcv(sk, skb, &xmitq); 2377 added = sk_rmem_alloc_get(sk) - before; 2378 atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); 2379 2380 /* Send pending response/rejected messages, if any */ 2381 tipc_node_distr_xmit(sock_net(sk), &xmitq); 2382 return 0; 2383 } 2384 2385 /** 2386 * tipc_sk_enqueue - extract all buffers with destination 'dport' from 2387 * inputq and try adding them to socket or backlog queue 2388 * @inputq: list of incoming buffers with potentially different destinations 2389 * @sk: socket where the buffers should be enqueued 2390 * @dport: port number for the socket 2391 * 2392 * Caller must hold socket lock 2393 */ 2394 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, 2395 u32 dport, struct sk_buff_head *xmitq) 2396 { 2397 unsigned long time_limit = jiffies + 2; 2398 struct sk_buff *skb; 2399 unsigned int lim; 2400 atomic_t *dcnt; 2401 u32 onode; 2402 2403 while (skb_queue_len(inputq)) { 2404 if (unlikely(time_after_eq(jiffies, time_limit))) 2405 return; 2406 2407 skb = tipc_skb_dequeue(inputq, dport); 2408 if (unlikely(!skb)) 2409 return; 2410 2411 /* Add message directly to receive queue if possible */ 2412 if (!sock_owned_by_user(sk)) { 2413 tipc_sk_filter_rcv(sk, skb, xmitq); 2414 continue; 2415 } 2416 2417 /* Try backlog, compensating for double-counted bytes */ 2418 dcnt = &tipc_sk(sk)->dupl_rcvcnt; 2419 if (!sk->sk_backlog.len) 2420 atomic_set(dcnt, 0); 2421 lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); 2422 if (likely(!sk_add_backlog(sk, skb, lim))) { 2423 trace_tipc_sk_overlimit1(sk, skb, TIPC_DUMP_ALL, 2424 "bklg & rcvq >90% allocated!"); 2425 continue; 2426 } 2427 2428 trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!"); 2429 /* Overload => reject message back to sender */ 2430 onode = tipc_own_addr(sock_net(sk)); 2431 atomic_inc(&sk->sk_drops); 2432 if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) { 2433 trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL, 2434 "@sk_enqueue!"); 2435 __skb_queue_tail(xmitq, skb); 2436 } 2437 break; 2438 } 2439 } 2440 2441 /** 2442 * tipc_sk_rcv - handle a chain of incoming buffers 2443 * @inputq: buffer list containing the buffers 2444 * Consumes all buffers in list until inputq is empty 2445 * Note: may be called in multiple threads referring to the same queue 2446 */ 2447 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) 2448 { 2449 struct sk_buff_head xmitq; 2450 u32 dnode, dport = 0; 2451 int err; 2452 struct tipc_sock *tsk; 2453 struct sock *sk; 2454 struct sk_buff *skb; 2455 2456 __skb_queue_head_init(&xmitq); 2457 while (skb_queue_len(inputq)) { 2458 dport = tipc_skb_peek_port(inputq, dport); 2459 tsk = tipc_sk_lookup(net, dport); 2460 2461 if (likely(tsk)) { 2462 sk = &tsk->sk; 2463 if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { 2464 tipc_sk_enqueue(inputq, sk, dport, &xmitq); 2465 spin_unlock_bh(&sk->sk_lock.slock); 2466 } 2467 /* Send pending response/rejected messages, if any */ 2468 tipc_node_distr_xmit(sock_net(sk), &xmitq); 2469 sock_put(sk); 2470 continue; 2471 } 2472 /* No destination socket => dequeue skb if still there */ 2473 skb = tipc_skb_dequeue(inputq, dport); 2474 if (!skb) 2475 return; 2476 2477 /* Try secondary lookup if unresolved named message */ 2478 err = TIPC_ERR_NO_PORT; 2479 if (tipc_msg_lookup_dest(net, skb, &err)) 2480 goto xmit; 2481 2482 /* Prepare for message rejection */ 2483 if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) 2484 continue; 2485 2486 trace_tipc_sk_rej_msg(NULL, skb, TIPC_DUMP_NONE, "@sk_rcv!"); 2487 xmit: 2488 dnode = msg_destnode(buf_msg(skb)); 2489 tipc_node_xmit_skb(net, skb, dnode, dport); 2490 } 2491 } 2492 2493 static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) 2494 { 2495 DEFINE_WAIT_FUNC(wait, woken_wake_function); 2496 struct sock *sk = sock->sk; 2497 int done; 2498 2499 do { 2500 int err = sock_error(sk); 2501 if (err) 2502 return err; 2503 if (!*timeo_p) 2504 return -ETIMEDOUT; 2505 if (signal_pending(current)) 2506 return sock_intr_errno(*timeo_p); 2507 if (sk->sk_state == TIPC_DISCONNECTING) 2508 break; 2509 2510 add_wait_queue(sk_sleep(sk), &wait); 2511 done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk), 2512 &wait); 2513 remove_wait_queue(sk_sleep(sk), &wait); 2514 } while (!done); 2515 return 0; 2516 } 2517 2518 static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) 2519 { 2520 if (addr->family != AF_TIPC) 2521 return false; 2522 if (addr->addrtype == TIPC_SERVICE_RANGE) 2523 return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); 2524 return (addr->addrtype == TIPC_SERVICE_ADDR || 2525 addr->addrtype == TIPC_SOCKET_ADDR); 2526 } 2527 2528 /** 2529 * tipc_connect - establish a connection to another TIPC port 2530 * @sock: socket structure 2531 * @dest: socket address for destination port 2532 * @destlen: size of socket address data structure 2533 * @flags: file-related flags associated with socket 2534 * 2535 * Returns 0 on success, errno otherwise 2536 */ 2537 static int tipc_connect(struct socket *sock, struct sockaddr *dest, 2538 int destlen, int flags) 2539 { 2540 struct sock *sk = sock->sk; 2541 struct tipc_sock *tsk = tipc_sk(sk); 2542 struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; 2543 struct msghdr m = {NULL,}; 2544 long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; 2545 int previous; 2546 int res = 0; 2547 2548 if (destlen != sizeof(struct sockaddr_tipc)) 2549 return -EINVAL; 2550 2551 lock_sock(sk); 2552 2553 if (tsk->group) { 2554 res = -EINVAL; 2555 goto exit; 2556 } 2557 2558 if (dst->family == AF_UNSPEC) { 2559 memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); 2560 if (!tipc_sk_type_connectionless(sk)) 2561 res = -EINVAL; 2562 goto exit; 2563 } 2564 if (!tipc_sockaddr_is_sane(dst)) { 2565 res = -EINVAL; 2566 goto exit; 2567 } 2568 /* DGRAM/RDM connect(), just save the destaddr */ 2569 if (tipc_sk_type_connectionless(sk)) { 2570 memcpy(&tsk->peer, dest, destlen); 2571 goto exit; 2572 } else if (dst->addrtype == TIPC_SERVICE_RANGE) { 2573 res = -EINVAL; 2574 goto exit; 2575 } 2576 2577 previous = sk->sk_state; 2578 2579 switch (sk->sk_state) { 2580 case TIPC_OPEN: 2581 /* Send a 'SYN-' to destination */ 2582 m.msg_name = dest; 2583 m.msg_namelen = destlen; 2584 2585 /* If connect is in non-blocking case, set MSG_DONTWAIT to 2586 * indicate send_msg() is never blocked. 2587 */ 2588 if (!timeout) 2589 m.msg_flags = MSG_DONTWAIT; 2590 2591 res = __tipc_sendmsg(sock, &m, 0); 2592 if ((res < 0) && (res != -EWOULDBLOCK)) 2593 goto exit; 2594 2595 /* Just entered TIPC_CONNECTING state; the only 2596 * difference is that return value in non-blocking 2597 * case is EINPROGRESS, rather than EALREADY. 2598 */ 2599 res = -EINPROGRESS; 2600 /* fall through */ 2601 case TIPC_CONNECTING: 2602 if (!timeout) { 2603 if (previous == TIPC_CONNECTING) 2604 res = -EALREADY; 2605 goto exit; 2606 } 2607 timeout = msecs_to_jiffies(timeout); 2608 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 2609 res = tipc_wait_for_connect(sock, &timeout); 2610 break; 2611 case TIPC_ESTABLISHED: 2612 res = -EISCONN; 2613 break; 2614 default: 2615 res = -EINVAL; 2616 } 2617 2618 exit: 2619 release_sock(sk); 2620 return res; 2621 } 2622 2623 /** 2624 * tipc_listen - allow socket to listen for incoming connections 2625 * @sock: socket structure 2626 * @len: (unused) 2627 * 2628 * Returns 0 on success, errno otherwise 2629 */ 2630 static int tipc_listen(struct socket *sock, int len) 2631 { 2632 struct sock *sk = sock->sk; 2633 int res; 2634 2635 lock_sock(sk); 2636 res = tipc_set_sk_state(sk, TIPC_LISTEN); 2637 release_sock(sk); 2638 2639 return res; 2640 } 2641 2642 static int tipc_wait_for_accept(struct socket *sock, long timeo) 2643 { 2644 struct sock *sk = sock->sk; 2645 DEFINE_WAIT(wait); 2646 int err; 2647 2648 /* True wake-one mechanism for incoming connections: only 2649 * one process gets woken up, not the 'whole herd'. 2650 * Since we do not 'race & poll' for established sockets 2651 * anymore, the common case will execute the loop only once. 2652 */ 2653 for (;;) { 2654 prepare_to_wait_exclusive(sk_sleep(sk), &wait, 2655 TASK_INTERRUPTIBLE); 2656 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { 2657 release_sock(sk); 2658 timeo = schedule_timeout(timeo); 2659 lock_sock(sk); 2660 } 2661 err = 0; 2662 if (!skb_queue_empty(&sk->sk_receive_queue)) 2663 break; 2664 err = -EAGAIN; 2665 if (!timeo) 2666 break; 2667 err = sock_intr_errno(timeo); 2668 if (signal_pending(current)) 2669 break; 2670 } 2671 finish_wait(sk_sleep(sk), &wait); 2672 return err; 2673 } 2674 2675 /** 2676 * tipc_accept - wait for connection request 2677 * @sock: listening socket 2678 * @new_sock: new socket that is to be connected 2679 * @flags: file-related flags associated with socket 2680 * 2681 * Returns 0 on success, errno otherwise 2682 */ 2683 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, 2684 bool kern) 2685 { 2686 struct sock *new_sk, *sk = sock->sk; 2687 struct sk_buff *buf; 2688 struct tipc_sock *new_tsock; 2689 struct tipc_msg *msg; 2690 long timeo; 2691 int res; 2692 2693 lock_sock(sk); 2694 2695 if (sk->sk_state != TIPC_LISTEN) { 2696 res = -EINVAL; 2697 goto exit; 2698 } 2699 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 2700 res = tipc_wait_for_accept(sock, timeo); 2701 if (res) 2702 goto exit; 2703 2704 buf = skb_peek(&sk->sk_receive_queue); 2705 2706 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); 2707 if (res) 2708 goto exit; 2709 security_sk_clone(sock->sk, new_sock->sk); 2710 2711 new_sk = new_sock->sk; 2712 new_tsock = tipc_sk(new_sk); 2713 msg = buf_msg(buf); 2714 2715 /* we lock on new_sk; but lockdep sees the lock on sk */ 2716 lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING); 2717 2718 /* 2719 * Reject any stray messages received by new socket 2720 * before the socket lock was taken (very, very unlikely) 2721 */ 2722 tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT); 2723 2724 /* Connect new socket to it's peer */ 2725 tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); 2726 2727 tsk_set_importance(new_sk, msg_importance(msg)); 2728 if (msg_named(msg)) { 2729 new_tsock->conn_type = msg_nametype(msg); 2730 new_tsock->conn_instance = msg_nameinst(msg); 2731 } 2732 2733 /* 2734 * Respond to 'SYN-' by discarding it & returning 'ACK'-. 2735 * Respond to 'SYN+' by queuing it on new socket. 2736 */ 2737 if (!msg_data_sz(msg)) { 2738 struct msghdr m = {NULL,}; 2739 2740 tsk_advance_rx_queue(sk); 2741 __tipc_sendstream(new_sock, &m, 0); 2742 } else { 2743 __skb_dequeue(&sk->sk_receive_queue); 2744 __skb_queue_head(&new_sk->sk_receive_queue, buf); 2745 skb_set_owner_r(buf, new_sk); 2746 } 2747 release_sock(new_sk); 2748 exit: 2749 release_sock(sk); 2750 return res; 2751 } 2752 2753 /** 2754 * tipc_shutdown - shutdown socket connection 2755 * @sock: socket structure 2756 * @how: direction to close (must be SHUT_RDWR) 2757 * 2758 * Terminates connection (if necessary), then purges socket's receive queue. 2759 * 2760 * Returns 0 on success, errno otherwise 2761 */ 2762 static int tipc_shutdown(struct socket *sock, int how) 2763 { 2764 struct sock *sk = sock->sk; 2765 int res; 2766 2767 if (how != SHUT_RDWR) 2768 return -EINVAL; 2769 2770 lock_sock(sk); 2771 2772 trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); 2773 __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); 2774 sk->sk_shutdown = SEND_SHUTDOWN; 2775 2776 if (sk->sk_state == TIPC_DISCONNECTING) { 2777 /* Discard any unreceived messages */ 2778 __skb_queue_purge(&sk->sk_receive_queue); 2779 2780 /* Wake up anyone sleeping in poll */ 2781 sk->sk_state_change(sk); 2782 res = 0; 2783 } else { 2784 res = -ENOTCONN; 2785 } 2786 2787 release_sock(sk); 2788 return res; 2789 } 2790 2791 static void tipc_sk_check_probing_state(struct sock *sk, 2792 struct sk_buff_head *list) 2793 { 2794 struct tipc_sock *tsk = tipc_sk(sk); 2795 u32 pnode = tsk_peer_node(tsk); 2796 u32 pport = tsk_peer_port(tsk); 2797 u32 self = tsk_own_node(tsk); 2798 u32 oport = tsk->portid; 2799 struct sk_buff *skb; 2800 2801 if (tsk->probe_unacked) { 2802 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 2803 sk->sk_err = ECONNABORTED; 2804 tipc_node_remove_conn(sock_net(sk), pnode, pport); 2805 sk->sk_state_change(sk); 2806 return; 2807 } 2808 /* Prepare new probe */ 2809 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, 2810 pnode, self, pport, oport, TIPC_OK); 2811 if (skb) 2812 __skb_queue_tail(list, skb); 2813 tsk->probe_unacked = true; 2814 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); 2815 } 2816 2817 static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) 2818 { 2819 struct tipc_sock *tsk = tipc_sk(sk); 2820 2821 /* Try again later if dest link is congested */ 2822 if (tsk->cong_link_cnt) { 2823 sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); 2824 return; 2825 } 2826 /* Prepare SYN for retransmit */ 2827 tipc_msg_skb_clone(&sk->sk_write_queue, list); 2828 } 2829 2830 static void tipc_sk_timeout(struct timer_list *t) 2831 { 2832 struct sock *sk = from_timer(sk, t, sk_timer); 2833 struct tipc_sock *tsk = tipc_sk(sk); 2834 u32 pnode = tsk_peer_node(tsk); 2835 struct sk_buff_head list; 2836 int rc = 0; 2837 2838 __skb_queue_head_init(&list); 2839 bh_lock_sock(sk); 2840 2841 /* Try again later if socket is busy */ 2842 if (sock_owned_by_user(sk)) { 2843 sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); 2844 bh_unlock_sock(sk); 2845 sock_put(sk); 2846 return; 2847 } 2848 2849 if (sk->sk_state == TIPC_ESTABLISHED) 2850 tipc_sk_check_probing_state(sk, &list); 2851 else if (sk->sk_state == TIPC_CONNECTING) 2852 tipc_sk_retry_connect(sk, &list); 2853 2854 bh_unlock_sock(sk); 2855 2856 if (!skb_queue_empty(&list)) 2857 rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid); 2858 2859 /* SYN messages may cause link congestion */ 2860 if (rc == -ELINKCONG) { 2861 tipc_dest_push(&tsk->cong_links, pnode, 0); 2862 tsk->cong_link_cnt = 1; 2863 } 2864 sock_put(sk); 2865 } 2866 2867 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 2868 struct tipc_name_seq const *seq) 2869 { 2870 struct sock *sk = &tsk->sk; 2871 struct net *net = sock_net(sk); 2872 struct publication *publ; 2873 u32 key; 2874 2875 if (scope != TIPC_NODE_SCOPE) 2876 scope = TIPC_CLUSTER_SCOPE; 2877 2878 if (tipc_sk_connected(sk)) 2879 return -EINVAL; 2880 key = tsk->portid + tsk->pub_count + 1; 2881 if (key == tsk->portid) 2882 return -EADDRINUSE; 2883 2884 publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, 2885 scope, tsk->portid, key); 2886 if (unlikely(!publ)) 2887 return -EINVAL; 2888 2889 list_add(&publ->binding_sock, &tsk->publications); 2890 tsk->pub_count++; 2891 tsk->published = 1; 2892 return 0; 2893 } 2894 2895 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, 2896 struct tipc_name_seq const *seq) 2897 { 2898 struct net *net = sock_net(&tsk->sk); 2899 struct publication *publ; 2900 struct publication *safe; 2901 int rc = -EINVAL; 2902 2903 if (scope != TIPC_NODE_SCOPE) 2904 scope = TIPC_CLUSTER_SCOPE; 2905 2906 list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { 2907 if (seq) { 2908 if (publ->scope != scope) 2909 continue; 2910 if (publ->type != seq->type) 2911 continue; 2912 if (publ->lower != seq->lower) 2913 continue; 2914 if (publ->upper != seq->upper) 2915 break; 2916 tipc_nametbl_withdraw(net, publ->type, publ->lower, 2917 publ->upper, publ->key); 2918 rc = 0; 2919 break; 2920 } 2921 tipc_nametbl_withdraw(net, publ->type, publ->lower, 2922 publ->upper, publ->key); 2923 rc = 0; 2924 } 2925 if (list_empty(&tsk->publications)) 2926 tsk->published = 0; 2927 return rc; 2928 } 2929 2930 /* tipc_sk_reinit: set non-zero address in all existing sockets 2931 * when we go from standalone to network mode. 2932 */ 2933 void tipc_sk_reinit(struct net *net) 2934 { 2935 struct tipc_net *tn = net_generic(net, tipc_net_id); 2936 struct rhashtable_iter iter; 2937 struct tipc_sock *tsk; 2938 struct tipc_msg *msg; 2939 2940 rhashtable_walk_enter(&tn->sk_rht, &iter); 2941 2942 do { 2943 rhashtable_walk_start(&iter); 2944 2945 while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { 2946 sock_hold(&tsk->sk); 2947 rhashtable_walk_stop(&iter); 2948 lock_sock(&tsk->sk); 2949 msg = &tsk->phdr; 2950 msg_set_prevnode(msg, tipc_own_addr(net)); 2951 msg_set_orignode(msg, tipc_own_addr(net)); 2952 release_sock(&tsk->sk); 2953 rhashtable_walk_start(&iter); 2954 sock_put(&tsk->sk); 2955 } 2956 2957 rhashtable_walk_stop(&iter); 2958 } while (tsk == ERR_PTR(-EAGAIN)); 2959 2960 rhashtable_walk_exit(&iter); 2961 } 2962 2963 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) 2964 { 2965 struct tipc_net *tn = net_generic(net, tipc_net_id); 2966 struct tipc_sock *tsk; 2967 2968 rcu_read_lock(); 2969 tsk = rhashtable_lookup(&tn->sk_rht, &portid, tsk_rht_params); 2970 if (tsk) 2971 sock_hold(&tsk->sk); 2972 rcu_read_unlock(); 2973 2974 return tsk; 2975 } 2976 2977 static int tipc_sk_insert(struct tipc_sock *tsk) 2978 { 2979 struct sock *sk = &tsk->sk; 2980 struct net *net = sock_net(sk); 2981 struct tipc_net *tn = net_generic(net, tipc_net_id); 2982 u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; 2983 u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; 2984 2985 while (remaining--) { 2986 portid++; 2987 if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) 2988 portid = TIPC_MIN_PORT; 2989 tsk->portid = portid; 2990 sock_hold(&tsk->sk); 2991 if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, 2992 tsk_rht_params)) 2993 return 0; 2994 sock_put(&tsk->sk); 2995 } 2996 2997 return -1; 2998 } 2999 3000 static void tipc_sk_remove(struct tipc_sock *tsk) 3001 { 3002 struct sock *sk = &tsk->sk; 3003 struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); 3004 3005 if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { 3006 WARN_ON(refcount_read(&sk->sk_refcnt) == 1); 3007 __sock_put(sk); 3008 } 3009 } 3010 3011 static const struct rhashtable_params tsk_rht_params = { 3012 .nelem_hint = 192, 3013 .head_offset = offsetof(struct tipc_sock, node), 3014 .key_offset = offsetof(struct tipc_sock, portid), 3015 .key_len = sizeof(u32), /* portid */ 3016 .max_size = 1048576, 3017 .min_size = 256, 3018 .automatic_shrinking = true, 3019 }; 3020 3021 int tipc_sk_rht_init(struct net *net) 3022 { 3023 struct tipc_net *tn = net_generic(net, tipc_net_id); 3024 3025 return rhashtable_init(&tn->sk_rht, &tsk_rht_params); 3026 } 3027 3028 void tipc_sk_rht_destroy(struct net *net) 3029 { 3030 struct tipc_net *tn = net_generic(net, tipc_net_id); 3031 3032 /* Wait for socket readers to complete */ 3033 synchronize_net(); 3034 3035 rhashtable_destroy(&tn->sk_rht); 3036 } 3037 3038 static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) 3039 { 3040 struct net *net = sock_net(&tsk->sk); 3041 struct tipc_group *grp = tsk->group; 3042 struct tipc_msg *hdr = &tsk->phdr; 3043 struct tipc_name_seq seq; 3044 int rc; 3045 3046 if (mreq->type < TIPC_RESERVED_TYPES) 3047 return -EACCES; 3048 if (mreq->scope > TIPC_NODE_SCOPE) 3049 return -EINVAL; 3050 if (grp) 3051 return -EACCES; 3052 grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); 3053 if (!grp) 3054 return -ENOMEM; 3055 tsk->group = grp; 3056 msg_set_lookup_scope(hdr, mreq->scope); 3057 msg_set_nametype(hdr, mreq->type); 3058 msg_set_dest_droppable(hdr, true); 3059 seq.type = mreq->type; 3060 seq.lower = mreq->instance; 3061 seq.upper = seq.lower; 3062 tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); 3063 rc = tipc_sk_publish(tsk, mreq->scope, &seq); 3064 if (rc) { 3065 tipc_group_delete(net, grp); 3066 tsk->group = NULL; 3067 return rc; 3068 } 3069 /* Eliminate any risk that a broadcast overtakes sent JOINs */ 3070 tsk->mc_method.rcast = true; 3071 tsk->mc_method.mandatory = true; 3072 tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); 3073 return rc; 3074 } 3075 3076 static int tipc_sk_leave(struct tipc_sock *tsk) 3077 { 3078 struct net *net = sock_net(&tsk->sk); 3079 struct tipc_group *grp = tsk->group; 3080 struct tipc_name_seq seq; 3081 int scope; 3082 3083 if (!grp) 3084 return -EINVAL; 3085 tipc_group_self(grp, &seq, &scope); 3086 tipc_group_delete(net, grp); 3087 tsk->group = NULL; 3088 tipc_sk_withdraw(tsk, scope, &seq); 3089 return 0; 3090 } 3091 3092 /** 3093 * tipc_setsockopt - set socket option 3094 * @sock: socket structure 3095 * @lvl: option level 3096 * @opt: option identifier 3097 * @ov: pointer to new option value 3098 * @ol: length of option value 3099 * 3100 * For stream sockets only, accepts and ignores all IPPROTO_TCP options 3101 * (to ease compatibility). 3102 * 3103 * Returns 0 on success, errno otherwise 3104 */ 3105 static int tipc_setsockopt(struct socket *sock, int lvl, int opt, 3106 sockptr_t ov, unsigned int ol) 3107 { 3108 struct sock *sk = sock->sk; 3109 struct tipc_sock *tsk = tipc_sk(sk); 3110 struct tipc_group_req mreq; 3111 u32 value = 0; 3112 int res = 0; 3113 3114 if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) 3115 return 0; 3116 if (lvl != SOL_TIPC) 3117 return -ENOPROTOOPT; 3118 3119 switch (opt) { 3120 case TIPC_IMPORTANCE: 3121 case TIPC_SRC_DROPPABLE: 3122 case TIPC_DEST_DROPPABLE: 3123 case TIPC_CONN_TIMEOUT: 3124 case TIPC_NODELAY: 3125 if (ol < sizeof(value)) 3126 return -EINVAL; 3127 if (copy_from_sockptr(&value, ov, sizeof(u32))) 3128 return -EFAULT; 3129 break; 3130 case TIPC_GROUP_JOIN: 3131 if (ol < sizeof(mreq)) 3132 return -EINVAL; 3133 if (copy_from_sockptr(&mreq, ov, sizeof(mreq))) 3134 return -EFAULT; 3135 break; 3136 default: 3137 if (!sockptr_is_null(ov) || ol) 3138 return -EINVAL; 3139 } 3140 3141 lock_sock(sk); 3142 3143 switch (opt) { 3144 case TIPC_IMPORTANCE: 3145 res = tsk_set_importance(sk, value); 3146 break; 3147 case TIPC_SRC_DROPPABLE: 3148 if (sock->type != SOCK_STREAM) 3149 tsk_set_unreliable(tsk, value); 3150 else 3151 res = -ENOPROTOOPT; 3152 break; 3153 case TIPC_DEST_DROPPABLE: 3154 tsk_set_unreturnable(tsk, value); 3155 break; 3156 case TIPC_CONN_TIMEOUT: 3157 tipc_sk(sk)->conn_timeout = value; 3158 break; 3159 case TIPC_MCAST_BROADCAST: 3160 tsk->mc_method.rcast = false; 3161 tsk->mc_method.mandatory = true; 3162 break; 3163 case TIPC_MCAST_REPLICAST: 3164 tsk->mc_method.rcast = true; 3165 tsk->mc_method.mandatory = true; 3166 break; 3167 case TIPC_GROUP_JOIN: 3168 res = tipc_sk_join(tsk, &mreq); 3169 break; 3170 case TIPC_GROUP_LEAVE: 3171 res = tipc_sk_leave(tsk); 3172 break; 3173 case TIPC_NODELAY: 3174 tsk->nodelay = !!value; 3175 tsk_set_nagle(tsk); 3176 break; 3177 default: 3178 res = -EINVAL; 3179 } 3180 3181 release_sock(sk); 3182 3183 return res; 3184 } 3185 3186 /** 3187 * tipc_getsockopt - get socket option 3188 * @sock: socket structure 3189 * @lvl: option level 3190 * @opt: option identifier 3191 * @ov: receptacle for option value 3192 * @ol: receptacle for length of option value 3193 * 3194 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options 3195 * (to ease compatibility). 3196 * 3197 * Returns 0 on success, errno otherwise 3198 */ 3199 static int tipc_getsockopt(struct socket *sock, int lvl, int opt, 3200 char __user *ov, int __user *ol) 3201 { 3202 struct sock *sk = sock->sk; 3203 struct tipc_sock *tsk = tipc_sk(sk); 3204 struct tipc_name_seq seq; 3205 int len, scope; 3206 u32 value; 3207 int res; 3208 3209 if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) 3210 return put_user(0, ol); 3211 if (lvl != SOL_TIPC) 3212 return -ENOPROTOOPT; 3213 res = get_user(len, ol); 3214 if (res) 3215 return res; 3216 3217 lock_sock(sk); 3218 3219 switch (opt) { 3220 case TIPC_IMPORTANCE: 3221 value = tsk_importance(tsk); 3222 break; 3223 case TIPC_SRC_DROPPABLE: 3224 value = tsk_unreliable(tsk); 3225 break; 3226 case TIPC_DEST_DROPPABLE: 3227 value = tsk_unreturnable(tsk); 3228 break; 3229 case TIPC_CONN_TIMEOUT: 3230 value = tsk->conn_timeout; 3231 /* no need to set "res", since already 0 at this point */ 3232 break; 3233 case TIPC_NODE_RECVQ_DEPTH: 3234 value = 0; /* was tipc_queue_size, now obsolete */ 3235 break; 3236 case TIPC_SOCK_RECVQ_DEPTH: 3237 value = skb_queue_len(&sk->sk_receive_queue); 3238 break; 3239 case TIPC_SOCK_RECVQ_USED: 3240 value = sk_rmem_alloc_get(sk); 3241 break; 3242 case TIPC_GROUP_JOIN: 3243 seq.type = 0; 3244 if (tsk->group) 3245 tipc_group_self(tsk->group, &seq, &scope); 3246 value = seq.type; 3247 break; 3248 default: 3249 res = -EINVAL; 3250 } 3251 3252 release_sock(sk); 3253 3254 if (res) 3255 return res; /* "get" failed */ 3256 3257 if (len < sizeof(value)) 3258 return -EINVAL; 3259 3260 if (copy_to_user(ov, &value, sizeof(value))) 3261 return -EFAULT; 3262 3263 return put_user(sizeof(value), ol); 3264 } 3265 3266 static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3267 { 3268 struct net *net = sock_net(sock->sk); 3269 struct tipc_sioc_nodeid_req nr = {0}; 3270 struct tipc_sioc_ln_req lnr; 3271 void __user *argp = (void __user *)arg; 3272 3273 switch (cmd) { 3274 case SIOCGETLINKNAME: 3275 if (copy_from_user(&lnr, argp, sizeof(lnr))) 3276 return -EFAULT; 3277 if (!tipc_node_get_linkname(net, 3278 lnr.bearer_id & 0xffff, lnr.peer, 3279 lnr.linkname, TIPC_MAX_LINK_NAME)) { 3280 if (copy_to_user(argp, &lnr, sizeof(lnr))) 3281 return -EFAULT; 3282 return 0; 3283 } 3284 return -EADDRNOTAVAIL; 3285 case SIOCGETNODEID: 3286 if (copy_from_user(&nr, argp, sizeof(nr))) 3287 return -EFAULT; 3288 if (!tipc_node_get_id(net, nr.peer, nr.node_id)) 3289 return -EADDRNOTAVAIL; 3290 if (copy_to_user(argp, &nr, sizeof(nr))) 3291 return -EFAULT; 3292 return 0; 3293 default: 3294 return -ENOIOCTLCMD; 3295 } 3296 } 3297 3298 static int tipc_socketpair(struct socket *sock1, struct socket *sock2) 3299 { 3300 struct tipc_sock *tsk2 = tipc_sk(sock2->sk); 3301 struct tipc_sock *tsk1 = tipc_sk(sock1->sk); 3302 u32 onode = tipc_own_addr(sock_net(sock1->sk)); 3303 3304 tsk1->peer.family = AF_TIPC; 3305 tsk1->peer.addrtype = TIPC_ADDR_ID; 3306 tsk1->peer.scope = TIPC_NODE_SCOPE; 3307 tsk1->peer.addr.id.ref = tsk2->portid; 3308 tsk1->peer.addr.id.node = onode; 3309 tsk2->peer.family = AF_TIPC; 3310 tsk2->peer.addrtype = TIPC_ADDR_ID; 3311 tsk2->peer.scope = TIPC_NODE_SCOPE; 3312 tsk2->peer.addr.id.ref = tsk1->portid; 3313 tsk2->peer.addr.id.node = onode; 3314 3315 tipc_sk_finish_conn(tsk1, tsk2->portid, onode); 3316 tipc_sk_finish_conn(tsk2, tsk1->portid, onode); 3317 return 0; 3318 } 3319 3320 /* Protocol switches for the various types of TIPC sockets */ 3321 3322 static const struct proto_ops msg_ops = { 3323 .owner = THIS_MODULE, 3324 .family = AF_TIPC, 3325 .release = tipc_release, 3326 .bind = tipc_bind, 3327 .connect = tipc_connect, 3328 .socketpair = tipc_socketpair, 3329 .accept = sock_no_accept, 3330 .getname = tipc_getname, 3331 .poll = tipc_poll, 3332 .ioctl = tipc_ioctl, 3333 .listen = sock_no_listen, 3334 .shutdown = tipc_shutdown, 3335 .setsockopt = tipc_setsockopt, 3336 .getsockopt = tipc_getsockopt, 3337 .sendmsg = tipc_sendmsg, 3338 .recvmsg = tipc_recvmsg, 3339 .mmap = sock_no_mmap, 3340 .sendpage = sock_no_sendpage 3341 }; 3342 3343 static const struct proto_ops packet_ops = { 3344 .owner = THIS_MODULE, 3345 .family = AF_TIPC, 3346 .release = tipc_release, 3347 .bind = tipc_bind, 3348 .connect = tipc_connect, 3349 .socketpair = tipc_socketpair, 3350 .accept = tipc_accept, 3351 .getname = tipc_getname, 3352 .poll = tipc_poll, 3353 .ioctl = tipc_ioctl, 3354 .listen = tipc_listen, 3355 .shutdown = tipc_shutdown, 3356 .setsockopt = tipc_setsockopt, 3357 .getsockopt = tipc_getsockopt, 3358 .sendmsg = tipc_send_packet, 3359 .recvmsg = tipc_recvmsg, 3360 .mmap = sock_no_mmap, 3361 .sendpage = sock_no_sendpage 3362 }; 3363 3364 static const struct proto_ops stream_ops = { 3365 .owner = THIS_MODULE, 3366 .family = AF_TIPC, 3367 .release = tipc_release, 3368 .bind = tipc_bind, 3369 .connect = tipc_connect, 3370 .socketpair = tipc_socketpair, 3371 .accept = tipc_accept, 3372 .getname = tipc_getname, 3373 .poll = tipc_poll, 3374 .ioctl = tipc_ioctl, 3375 .listen = tipc_listen, 3376 .shutdown = tipc_shutdown, 3377 .setsockopt = tipc_setsockopt, 3378 .getsockopt = tipc_getsockopt, 3379 .sendmsg = tipc_sendstream, 3380 .recvmsg = tipc_recvstream, 3381 .mmap = sock_no_mmap, 3382 .sendpage = sock_no_sendpage 3383 }; 3384 3385 static const struct net_proto_family tipc_family_ops = { 3386 .owner = THIS_MODULE, 3387 .family = AF_TIPC, 3388 .create = tipc_sk_create 3389 }; 3390 3391 static struct proto tipc_proto = { 3392 .name = "TIPC", 3393 .owner = THIS_MODULE, 3394 .obj_size = sizeof(struct tipc_sock), 3395 .sysctl_rmem = sysctl_tipc_rmem 3396 }; 3397 3398 /** 3399 * tipc_socket_init - initialize TIPC socket interface 3400 * 3401 * Returns 0 on success, errno otherwise 3402 */ 3403 int tipc_socket_init(void) 3404 { 3405 int res; 3406 3407 res = proto_register(&tipc_proto, 1); 3408 if (res) { 3409 pr_err("Failed to register TIPC protocol type\n"); 3410 goto out; 3411 } 3412 3413 res = sock_register(&tipc_family_ops); 3414 if (res) { 3415 pr_err("Failed to register TIPC socket type\n"); 3416 proto_unregister(&tipc_proto); 3417 goto out; 3418 } 3419 out: 3420 return res; 3421 } 3422 3423 /** 3424 * tipc_socket_stop - stop TIPC socket interface 3425 */ 3426 void tipc_socket_stop(void) 3427 { 3428 sock_unregister(tipc_family_ops.family); 3429 proto_unregister(&tipc_proto); 3430 } 3431 3432 /* Caller should hold socket lock for the passed tipc socket. */ 3433 static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) 3434 { 3435 u32 peer_node; 3436 u32 peer_port; 3437 struct nlattr *nest; 3438 3439 peer_node = tsk_peer_node(tsk); 3440 peer_port = tsk_peer_port(tsk); 3441 3442 nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON); 3443 if (!nest) 3444 return -EMSGSIZE; 3445 3446 if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) 3447 goto msg_full; 3448 if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) 3449 goto msg_full; 3450 3451 if (tsk->conn_type != 0) { 3452 if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) 3453 goto msg_full; 3454 if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) 3455 goto msg_full; 3456 if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) 3457 goto msg_full; 3458 } 3459 nla_nest_end(skb, nest); 3460 3461 return 0; 3462 3463 msg_full: 3464 nla_nest_cancel(skb, nest); 3465 3466 return -EMSGSIZE; 3467 } 3468 3469 static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock 3470 *tsk) 3471 { 3472 struct net *net = sock_net(skb->sk); 3473 struct sock *sk = &tsk->sk; 3474 3475 if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) || 3476 nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net))) 3477 return -EMSGSIZE; 3478 3479 if (tipc_sk_connected(sk)) { 3480 if (__tipc_nl_add_sk_con(skb, tsk)) 3481 return -EMSGSIZE; 3482 } else if (!list_empty(&tsk->publications)) { 3483 if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) 3484 return -EMSGSIZE; 3485 } 3486 return 0; 3487 } 3488 3489 /* Caller should hold socket lock for the passed tipc socket. */ 3490 static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, 3491 struct tipc_sock *tsk) 3492 { 3493 struct nlattr *attrs; 3494 void *hdr; 3495 3496 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3497 &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); 3498 if (!hdr) 3499 goto msg_cancel; 3500 3501 attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); 3502 if (!attrs) 3503 goto genlmsg_cancel; 3504 3505 if (__tipc_nl_add_sk_info(skb, tsk)) 3506 goto attr_msg_cancel; 3507 3508 nla_nest_end(skb, attrs); 3509 genlmsg_end(skb, hdr); 3510 3511 return 0; 3512 3513 attr_msg_cancel: 3514 nla_nest_cancel(skb, attrs); 3515 genlmsg_cancel: 3516 genlmsg_cancel(skb, hdr); 3517 msg_cancel: 3518 return -EMSGSIZE; 3519 } 3520 3521 int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, 3522 int (*skb_handler)(struct sk_buff *skb, 3523 struct netlink_callback *cb, 3524 struct tipc_sock *tsk)) 3525 { 3526 struct rhashtable_iter *iter = (void *)cb->args[4]; 3527 struct tipc_sock *tsk; 3528 int err; 3529 3530 rhashtable_walk_start(iter); 3531 while ((tsk = rhashtable_walk_next(iter)) != NULL) { 3532 if (IS_ERR(tsk)) { 3533 err = PTR_ERR(tsk); 3534 if (err == -EAGAIN) { 3535 err = 0; 3536 continue; 3537 } 3538 break; 3539 } 3540 3541 sock_hold(&tsk->sk); 3542 rhashtable_walk_stop(iter); 3543 lock_sock(&tsk->sk); 3544 err = skb_handler(skb, cb, tsk); 3545 if (err) { 3546 release_sock(&tsk->sk); 3547 sock_put(&tsk->sk); 3548 goto out; 3549 } 3550 release_sock(&tsk->sk); 3551 rhashtable_walk_start(iter); 3552 sock_put(&tsk->sk); 3553 } 3554 rhashtable_walk_stop(iter); 3555 out: 3556 return skb->len; 3557 } 3558 EXPORT_SYMBOL(tipc_nl_sk_walk); 3559 3560 int tipc_dump_start(struct netlink_callback *cb) 3561 { 3562 return __tipc_dump_start(cb, sock_net(cb->skb->sk)); 3563 } 3564 EXPORT_SYMBOL(tipc_dump_start); 3565 3566 int __tipc_dump_start(struct netlink_callback *cb, struct net *net) 3567 { 3568 /* tipc_nl_name_table_dump() uses cb->args[0...3]. */ 3569 struct rhashtable_iter *iter = (void *)cb->args[4]; 3570 struct tipc_net *tn = tipc_net(net); 3571 3572 if (!iter) { 3573 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 3574 if (!iter) 3575 return -ENOMEM; 3576 3577 cb->args[4] = (long)iter; 3578 } 3579 3580 rhashtable_walk_enter(&tn->sk_rht, iter); 3581 return 0; 3582 } 3583 3584 int tipc_dump_done(struct netlink_callback *cb) 3585 { 3586 struct rhashtable_iter *hti = (void *)cb->args[4]; 3587 3588 rhashtable_walk_exit(hti); 3589 kfree(hti); 3590 return 0; 3591 } 3592 EXPORT_SYMBOL(tipc_dump_done); 3593 3594 int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, 3595 struct tipc_sock *tsk, u32 sk_filter_state, 3596 u64 (*tipc_diag_gen_cookie)(struct sock *sk)) 3597 { 3598 struct sock *sk = &tsk->sk; 3599 struct nlattr *attrs; 3600 struct nlattr *stat; 3601 3602 /*filter response w.r.t sk_state*/ 3603 if (!(sk_filter_state & (1 << sk->sk_state))) 3604 return 0; 3605 3606 attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); 3607 if (!attrs) 3608 goto msg_cancel; 3609 3610 if (__tipc_nl_add_sk_info(skb, tsk)) 3611 goto attr_msg_cancel; 3612 3613 if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) || 3614 nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) || 3615 nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || 3616 nla_put_u32(skb, TIPC_NLA_SOCK_UID, 3617 from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk), 3618 sock_i_uid(sk))) || 3619 nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, 3620 tipc_diag_gen_cookie(sk), 3621 TIPC_NLA_SOCK_PAD)) 3622 goto attr_msg_cancel; 3623 3624 stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT); 3625 if (!stat) 3626 goto attr_msg_cancel; 3627 3628 if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ, 3629 skb_queue_len(&sk->sk_receive_queue)) || 3630 nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, 3631 skb_queue_len(&sk->sk_write_queue)) || 3632 nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, 3633 atomic_read(&sk->sk_drops))) 3634 goto stat_msg_cancel; 3635 3636 if (tsk->cong_link_cnt && 3637 nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG)) 3638 goto stat_msg_cancel; 3639 3640 if (tsk_conn_cong(tsk) && 3641 nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG)) 3642 goto stat_msg_cancel; 3643 3644 nla_nest_end(skb, stat); 3645 3646 if (tsk->group) 3647 if (tipc_group_fill_sock_diag(tsk->group, skb)) 3648 goto stat_msg_cancel; 3649 3650 nla_nest_end(skb, attrs); 3651 3652 return 0; 3653 3654 stat_msg_cancel: 3655 nla_nest_cancel(skb, stat); 3656 attr_msg_cancel: 3657 nla_nest_cancel(skb, attrs); 3658 msg_cancel: 3659 return -EMSGSIZE; 3660 } 3661 EXPORT_SYMBOL(tipc_sk_fill_sock_diag); 3662 3663 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) 3664 { 3665 return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk); 3666 } 3667 3668 /* Caller should hold socket lock for the passed tipc socket. */ 3669 static int __tipc_nl_add_sk_publ(struct sk_buff *skb, 3670 struct netlink_callback *cb, 3671 struct publication *publ) 3672 { 3673 void *hdr; 3674 struct nlattr *attrs; 3675 3676 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3677 &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); 3678 if (!hdr) 3679 goto msg_cancel; 3680 3681 attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL); 3682 if (!attrs) 3683 goto genlmsg_cancel; 3684 3685 if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) 3686 goto attr_msg_cancel; 3687 if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) 3688 goto attr_msg_cancel; 3689 if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) 3690 goto attr_msg_cancel; 3691 if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) 3692 goto attr_msg_cancel; 3693 3694 nla_nest_end(skb, attrs); 3695 genlmsg_end(skb, hdr); 3696 3697 return 0; 3698 3699 attr_msg_cancel: 3700 nla_nest_cancel(skb, attrs); 3701 genlmsg_cancel: 3702 genlmsg_cancel(skb, hdr); 3703 msg_cancel: 3704 return -EMSGSIZE; 3705 } 3706 3707 /* Caller should hold socket lock for the passed tipc socket. */ 3708 static int __tipc_nl_list_sk_publ(struct sk_buff *skb, 3709 struct netlink_callback *cb, 3710 struct tipc_sock *tsk, u32 *last_publ) 3711 { 3712 int err; 3713 struct publication *p; 3714 3715 if (*last_publ) { 3716 list_for_each_entry(p, &tsk->publications, binding_sock) { 3717 if (p->key == *last_publ) 3718 break; 3719 } 3720 if (p->key != *last_publ) { 3721 /* We never set seq or call nl_dump_check_consistent() 3722 * this means that setting prev_seq here will cause the 3723 * consistence check to fail in the netlink callback 3724 * handler. Resulting in the last NLMSG_DONE message 3725 * having the NLM_F_DUMP_INTR flag set. 3726 */ 3727 cb->prev_seq = 1; 3728 *last_publ = 0; 3729 return -EPIPE; 3730 } 3731 } else { 3732 p = list_first_entry(&tsk->publications, struct publication, 3733 binding_sock); 3734 } 3735 3736 list_for_each_entry_from(p, &tsk->publications, binding_sock) { 3737 err = __tipc_nl_add_sk_publ(skb, cb, p); 3738 if (err) { 3739 *last_publ = p->key; 3740 return err; 3741 } 3742 } 3743 *last_publ = 0; 3744 3745 return 0; 3746 } 3747 3748 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) 3749 { 3750 int err; 3751 u32 tsk_portid = cb->args[0]; 3752 u32 last_publ = cb->args[1]; 3753 u32 done = cb->args[2]; 3754 struct net *net = sock_net(skb->sk); 3755 struct tipc_sock *tsk; 3756 3757 if (!tsk_portid) { 3758 struct nlattr **attrs = genl_dumpit_info(cb)->attrs; 3759 struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; 3760 3761 if (!attrs[TIPC_NLA_SOCK]) 3762 return -EINVAL; 3763 3764 err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, 3765 attrs[TIPC_NLA_SOCK], 3766 tipc_nl_sock_policy, NULL); 3767 if (err) 3768 return err; 3769 3770 if (!sock[TIPC_NLA_SOCK_REF]) 3771 return -EINVAL; 3772 3773 tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); 3774 } 3775 3776 if (done) 3777 return 0; 3778 3779 tsk = tipc_sk_lookup(net, tsk_portid); 3780 if (!tsk) 3781 return -EINVAL; 3782 3783 lock_sock(&tsk->sk); 3784 err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); 3785 if (!err) 3786 done = 1; 3787 release_sock(&tsk->sk); 3788 sock_put(&tsk->sk); 3789 3790 cb->args[0] = tsk_portid; 3791 cb->args[1] = last_publ; 3792 cb->args[2] = done; 3793 3794 return skb->len; 3795 } 3796 3797 /** 3798 * tipc_sk_filtering - check if a socket should be traced 3799 * @sk: the socket to be examined 3800 * @sysctl_tipc_sk_filter[]: the socket tuple for filtering, 3801 * (portid, sock type, name type, name lower, name upper) 3802 * 3803 * Returns true if the socket meets the socket tuple data 3804 * (value 0 = 'any') or when there is no tuple set (all = 0), 3805 * otherwise false 3806 */ 3807 bool tipc_sk_filtering(struct sock *sk) 3808 { 3809 struct tipc_sock *tsk; 3810 struct publication *p; 3811 u32 _port, _sktype, _type, _lower, _upper; 3812 u32 type = 0, lower = 0, upper = 0; 3813 3814 if (!sk) 3815 return true; 3816 3817 tsk = tipc_sk(sk); 3818 3819 _port = sysctl_tipc_sk_filter[0]; 3820 _sktype = sysctl_tipc_sk_filter[1]; 3821 _type = sysctl_tipc_sk_filter[2]; 3822 _lower = sysctl_tipc_sk_filter[3]; 3823 _upper = sysctl_tipc_sk_filter[4]; 3824 3825 if (!_port && !_sktype && !_type && !_lower && !_upper) 3826 return true; 3827 3828 if (_port) 3829 return (_port == tsk->portid); 3830 3831 if (_sktype && _sktype != sk->sk_type) 3832 return false; 3833 3834 if (tsk->published) { 3835 p = list_first_entry_or_null(&tsk->publications, 3836 struct publication, binding_sock); 3837 if (p) { 3838 type = p->type; 3839 lower = p->lower; 3840 upper = p->upper; 3841 } 3842 } 3843 3844 if (!tipc_sk_type_connectionless(sk)) { 3845 type = tsk->conn_type; 3846 lower = tsk->conn_instance; 3847 upper = tsk->conn_instance; 3848 } 3849 3850 if ((_type && _type != type) || (_lower && _lower != lower) || 3851 (_upper && _upper != upper)) 3852 return false; 3853 3854 return true; 3855 } 3856 3857 u32 tipc_sock_get_portid(struct sock *sk) 3858 { 3859 return (sk) ? (tipc_sk(sk))->portid : 0; 3860 } 3861 3862 /** 3863 * tipc_sk_overlimit1 - check if socket rx queue is about to be overloaded, 3864 * both the rcv and backlog queues are considered 3865 * @sk: tipc sk to be checked 3866 * @skb: tipc msg to be checked 3867 * 3868 * Returns true if the socket rx queue allocation is > 90%, otherwise false 3869 */ 3870 3871 bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) 3872 { 3873 atomic_t *dcnt = &tipc_sk(sk)->dupl_rcvcnt; 3874 unsigned int lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); 3875 unsigned int qsize = sk->sk_backlog.len + sk_rmem_alloc_get(sk); 3876 3877 return (qsize > lim * 90 / 100); 3878 } 3879 3880 /** 3881 * tipc_sk_overlimit2 - check if socket rx queue is about to be overloaded, 3882 * only the rcv queue is considered 3883 * @sk: tipc sk to be checked 3884 * @skb: tipc msg to be checked 3885 * 3886 * Returns true if the socket rx queue allocation is > 90%, otherwise false 3887 */ 3888 3889 bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb) 3890 { 3891 unsigned int lim = rcvbuf_limit(sk, skb); 3892 unsigned int qsize = sk_rmem_alloc_get(sk); 3893 3894 return (qsize > lim * 90 / 100); 3895 } 3896 3897 /** 3898 * tipc_sk_dump - dump TIPC socket 3899 * @sk: tipc sk to be dumped 3900 * @dqueues: bitmask to decide if any socket queue to be dumped? 3901 * - TIPC_DUMP_NONE: don't dump socket queues 3902 * - TIPC_DUMP_SK_SNDQ: dump socket send queue 3903 * - TIPC_DUMP_SK_RCVQ: dump socket rcv queue 3904 * - TIPC_DUMP_SK_BKLGQ: dump socket backlog queue 3905 * - TIPC_DUMP_ALL: dump all the socket queues above 3906 * @buf: returned buffer of dump data in format 3907 */ 3908 int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) 3909 { 3910 int i = 0; 3911 size_t sz = (dqueues) ? SK_LMAX : SK_LMIN; 3912 struct tipc_sock *tsk; 3913 struct publication *p; 3914 bool tsk_connected; 3915 3916 if (!sk) { 3917 i += scnprintf(buf, sz, "sk data: (null)\n"); 3918 return i; 3919 } 3920 3921 tsk = tipc_sk(sk); 3922 tsk_connected = !tipc_sk_type_connectionless(sk); 3923 3924 i += scnprintf(buf, sz, "sk data: %u", sk->sk_type); 3925 i += scnprintf(buf + i, sz - i, " %d", sk->sk_state); 3926 i += scnprintf(buf + i, sz - i, " %x", tsk_own_node(tsk)); 3927 i += scnprintf(buf + i, sz - i, " %u", tsk->portid); 3928 i += scnprintf(buf + i, sz - i, " | %u", tsk_connected); 3929 if (tsk_connected) { 3930 i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk)); 3931 i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk)); 3932 i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type); 3933 i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance); 3934 } 3935 i += scnprintf(buf + i, sz - i, " | %u", tsk->published); 3936 if (tsk->published) { 3937 p = list_first_entry_or_null(&tsk->publications, 3938 struct publication, binding_sock); 3939 i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0); 3940 i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0); 3941 i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0); 3942 } 3943 i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); 3944 i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); 3945 i += scnprintf(buf + i, sz - i, " %u", tsk->max_pkt); 3946 i += scnprintf(buf + i, sz - i, " %x", tsk->peer_caps); 3947 i += scnprintf(buf + i, sz - i, " %u", tsk->cong_link_cnt); 3948 i += scnprintf(buf + i, sz - i, " %u", tsk->snt_unacked); 3949 i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_unacked); 3950 i += scnprintf(buf + i, sz - i, " %u", atomic_read(&tsk->dupl_rcvcnt)); 3951 i += scnprintf(buf + i, sz - i, " %u", sk->sk_shutdown); 3952 i += scnprintf(buf + i, sz - i, " | %d", sk_wmem_alloc_get(sk)); 3953 i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf); 3954 i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk)); 3955 i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf); 3956 i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len)); 3957 3958 if (dqueues & TIPC_DUMP_SK_SNDQ) { 3959 i += scnprintf(buf + i, sz - i, "sk_write_queue: "); 3960 i += tipc_list_dump(&sk->sk_write_queue, false, buf + i); 3961 } 3962 3963 if (dqueues & TIPC_DUMP_SK_RCVQ) { 3964 i += scnprintf(buf + i, sz - i, "sk_receive_queue: "); 3965 i += tipc_list_dump(&sk->sk_receive_queue, false, buf + i); 3966 } 3967 3968 if (dqueues & TIPC_DUMP_SK_BKLGQ) { 3969 i += scnprintf(buf + i, sz - i, "sk_backlog:\n head "); 3970 i += tipc_skb_dump(sk->sk_backlog.head, false, buf + i); 3971 if (sk->sk_backlog.tail != sk->sk_backlog.head) { 3972 i += scnprintf(buf + i, sz - i, " tail "); 3973 i += tipc_skb_dump(sk->sk_backlog.tail, false, 3974 buf + i); 3975 } 3976 } 3977 3978 return i; 3979 } 3980