recv.c (c74a7469f97c0f40b46e82ee979f9fb1bb6e847c) recv.c (eee2fa6ab3225192d6d894c54a6fb02ac9efdff6)
1/*
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
2 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or

--- 25 unchanged lines hidden (view full) ---

36#include <linux/in.h>
37#include <linux/export.h>
38#include <linux/time.h>
39#include <linux/rds.h>
40
41#include "rds.h"
42
43void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or

--- 25 unchanged lines hidden (view full) ---

36#include <linux/in.h>
37#include <linux/export.h>
38#include <linux/time.h>
39#include <linux/rds.h>
40
41#include "rds.h"
42
43void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
44 __be32 saddr)
44 struct in6_addr *saddr)
45{
46 int i;
47
48 refcount_set(&inc->i_refcount, 1);
49 INIT_LIST_HEAD(&inc->i_item);
50 inc->i_conn = conn;
45{
46 int i;
47
48 refcount_set(&inc->i_refcount, 1);
49 INIT_LIST_HEAD(&inc->i_item);
50 inc->i_conn = conn;
51 inc->i_saddr = saddr;
51 inc->i_saddr = *saddr;
52 inc->i_rdma_cookie = 0;
53 inc->i_rx_tstamp.tv_sec = 0;
54 inc->i_rx_tstamp.tv_usec = 0;
55
56 for (i = 0; i < RDS_RX_MAX_TRACES; i++)
57 inc->i_rx_lat_trace[i] = 0;
58}
59EXPORT_SYMBOL_GPL(rds_inc_init);
60
61void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
52 inc->i_rdma_cookie = 0;
53 inc->i_rx_tstamp.tv_sec = 0;
54 inc->i_rx_tstamp.tv_usec = 0;
55
56 for (i = 0; i < RDS_RX_MAX_TRACES; i++)
57 inc->i_rx_lat_trace[i] = 0;
58}
59EXPORT_SYMBOL_GPL(rds_inc_init);
60
61void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
62 __be32 saddr)
62 struct in6_addr *saddr)
63{
64 refcount_set(&inc->i_refcount, 1);
65 INIT_LIST_HEAD(&inc->i_item);
66 inc->i_conn = cp->cp_conn;
67 inc->i_conn_path = cp;
63{
64 refcount_set(&inc->i_refcount, 1);
65 INIT_LIST_HEAD(&inc->i_item);
66 inc->i_conn = cp->cp_conn;
67 inc->i_conn_path = cp;
68 inc->i_saddr = saddr;
68 inc->i_saddr = *saddr;
69 inc->i_rdma_cookie = 0;
70 inc->i_rx_tstamp.tv_sec = 0;
71 inc->i_rx_tstamp.tv_usec = 0;
72}
73EXPORT_SYMBOL_GPL(rds_inc_path_init);
74
75static void rds_inc_addref(struct rds_incoming *inc)
76{

--- 28 unchanged lines hidden (view full) ---

105 rds_stats_add(s_recv_bytes_removed_from_socket, -delta);
106
107 /* loop transport doesn't send/recv congestion updates */
108 if (rs->rs_transport->t_type == RDS_TRANS_LOOP)
109 return;
110
111 now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
112
69 inc->i_rdma_cookie = 0;
70 inc->i_rx_tstamp.tv_sec = 0;
71 inc->i_rx_tstamp.tv_usec = 0;
72}
73EXPORT_SYMBOL_GPL(rds_inc_path_init);
74
75static void rds_inc_addref(struct rds_incoming *inc)
76{

--- 28 unchanged lines hidden (view full) ---

105 rds_stats_add(s_recv_bytes_removed_from_socket, -delta);
106
107 /* loop transport doesn't send/recv congestion updates */
108 if (rs->rs_transport->t_type == RDS_TRANS_LOOP)
109 return;
110
111 now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
112
113 rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
113 rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d "
114 "now_cong %d delta %d\n",
115 rs, &rs->rs_bound_addr,
116 ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
117 rds_sk_rcvbuf(rs), now_congested, delta);
118
119 /* wasn't -> am congested */
120 if (!rs->rs_congested && now_congested) {
121 rs->rs_congested = 1;

--- 133 unchanged lines hidden (view full) ---

255 * when connection is completed.
256 */
257static void rds_start_mprds(struct rds_connection *conn)
258{
259 int i;
260 struct rds_conn_path *cp;
261
262 if (conn->c_npaths > 1 &&
114 "now_cong %d delta %d\n",
115 rs, &rs->rs_bound_addr,
116 ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
117 rds_sk_rcvbuf(rs), now_congested, delta);
118
119 /* wasn't -> am congested */
120 if (!rs->rs_congested && now_congested) {
121 rs->rs_congested = 1;

--- 133 unchanged lines hidden (view full) ---

255 * when connection is completed.
256 */
257static void rds_start_mprds(struct rds_connection *conn)
258{
259 int i;
260 struct rds_conn_path *cp;
261
262 if (conn->c_npaths > 1 &&
263 IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
263 rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) {
264 for (i = 0; i < conn->c_npaths; i++) {
265 cp = &conn->c_path[i];
266 rds_conn_path_connect_if_down(cp);
267 }
268 }
269}
270
271/*

--- 7 unchanged lines hidden (view full) ---

279 * we save flow lookup and locking for each frag arrival. It does mean
280 * that small messages will wait behind large ones. Fragmenting at all
281 * is only to reduce the memory consumption of pre-posted buffers.
282 *
283 * The caller passes in saddr and daddr instead of us getting it from the
284 * conn. This lets loopback, who only has one conn for both directions,
285 * tell us which roles the addrs in the conn are playing for this message.
286 */
264 for (i = 0; i < conn->c_npaths; i++) {
265 cp = &conn->c_path[i];
266 rds_conn_path_connect_if_down(cp);
267 }
268 }
269}
270
271/*

--- 7 unchanged lines hidden (view full) ---

279 * we save flow lookup and locking for each frag arrival. It does mean
280 * that small messages will wait behind large ones. Fragmenting at all
281 * is only to reduce the memory consumption of pre-posted buffers.
282 *
283 * The caller passes in saddr and daddr instead of us getting it from the
284 * conn. This lets loopback, who only has one conn for both directions,
285 * tell us which roles the addrs in the conn are playing for this message.
286 */
287void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
287void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
288 struct in6_addr *daddr,
288 struct rds_incoming *inc, gfp_t gfp)
289{
290 struct rds_sock *rs = NULL;
291 struct sock *sk;
292 unsigned long flags;
293 struct rds_conn_path *cp;
294
295 inc->i_conn = conn;

--- 38 unchanged lines hidden (view full) ---

334 (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) {
335 rds_stats_inc(s_recv_drop_old_seq);
336 goto out;
337 }
338 cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
339
340 if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
341 if (inc->i_hdr.h_sport == 0) {
289 struct rds_incoming *inc, gfp_t gfp)
290{
291 struct rds_sock *rs = NULL;
292 struct sock *sk;
293 unsigned long flags;
294 struct rds_conn_path *cp;
295
296 inc->i_conn = conn;

--- 38 unchanged lines hidden (view full) ---

335 (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) {
336 rds_stats_inc(s_recv_drop_old_seq);
337 goto out;
338 }
339 cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
340
341 if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
342 if (inc->i_hdr.h_sport == 0) {
342 rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
343 rdsdebug("ignore ping with 0 sport from %pI6c\n",
344 saddr);
343 goto out;
344 }
345 rds_stats_inc(s_recv_ping);
346 rds_send_pong(cp, inc->i_hdr.h_sport);
347 /* if this is a handshake ping, start multipath if necessary */
348 if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport),
349 be16_to_cpu(inc->i_hdr.h_dport))) {
350 rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);

--- 6 unchanged lines hidden (view full) ---

357 inc->i_hdr.h_sport == 0) {
358 rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
359 /* if this is a handshake pong, start multipath if necessary */
360 rds_start_mprds(cp->cp_conn);
361 wake_up(&cp->cp_conn->c_hs_waitq);
362 goto out;
363 }
364
345 goto out;
346 }
347 rds_stats_inc(s_recv_ping);
348 rds_send_pong(cp, inc->i_hdr.h_sport);
349 /* if this is a handshake ping, start multipath if necessary */
350 if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport),
351 be16_to_cpu(inc->i_hdr.h_dport))) {
352 rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);

--- 6 unchanged lines hidden (view full) ---

359 inc->i_hdr.h_sport == 0) {
360 rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
361 /* if this is a handshake pong, start multipath if necessary */
362 rds_start_mprds(cp->cp_conn);
363 wake_up(&cp->cp_conn->c_hs_waitq);
364 goto out;
365 }
366
365 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
367 rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if);
366 if (!rs) {
367 rds_stats_inc(s_recv_drop_no_sock);
368 goto out;
369 }
370
371 /* Process extension headers */
372 rds_recv_incoming_exthdrs(inc, rs);
373

--- 246 unchanged lines hidden (view full) ---

620
621int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
622 int msg_flags)
623{
624 struct sock *sk = sock->sk;
625 struct rds_sock *rs = rds_sk_to_rs(sk);
626 long timeo;
627 int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
368 if (!rs) {
369 rds_stats_inc(s_recv_drop_no_sock);
370 goto out;
371 }
372
373 /* Process extension headers */
374 rds_recv_incoming_exthdrs(inc, rs);
375

--- 246 unchanged lines hidden (view full) ---

622
623int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
624 int msg_flags)
625{
626 struct sock *sk = sock->sk;
627 struct rds_sock *rs = rds_sk_to_rs(sk);
628 long timeo;
629 int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
630 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
628 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
629 struct rds_incoming *inc = NULL;
630
631 /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
632 timeo = sock_rcvtimeo(sk, nonblock);
633
634 rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
635

--- 32 unchanged lines hidden (view full) ---

668 continue;
669
670 ret = timeo;
671 if (ret == 0)
672 ret = -ETIMEDOUT;
673 break;
674 }
675
631 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
632 struct rds_incoming *inc = NULL;
633
634 /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
635 timeo = sock_rcvtimeo(sk, nonblock);
636
637 rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
638

--- 32 unchanged lines hidden (view full) ---

671 continue;
672
673 ret = timeo;
674 if (ret == 0)
675 ret = -ETIMEDOUT;
676 break;
677 }
678
676 rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
679 rdsdebug("copying inc %p from %pI6c:%u to user\n", inc,
677 &inc->i_conn->c_faddr,
678 ntohs(inc->i_hdr.h_sport));
679 ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
680 if (ret < 0)
681 break;
682
683 /*
684 * if the message we just copied isn't at the head of the

--- 17 unchanged lines hidden (view full) ---

702 if (rds_cmsg_recv(inc, msg, rs)) {
703 ret = -EFAULT;
704 goto out;
705 }
706 rds_recvmsg_zcookie(rs, msg);
707
708 rds_stats_inc(s_recv_delivered);
709
680 &inc->i_conn->c_faddr,
681 ntohs(inc->i_hdr.h_sport));
682 ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
683 if (ret < 0)
684 break;
685
686 /*
687 * if the message we just copied isn't at the head of the

--- 17 unchanged lines hidden (view full) ---

705 if (rds_cmsg_recv(inc, msg, rs)) {
706 ret = -EFAULT;
707 goto out;
708 }
709 rds_recvmsg_zcookie(rs, msg);
710
711 rds_stats_inc(s_recv_delivered);
712
710 if (sin) {
711 sin->sin_family = AF_INET;
712 sin->sin_port = inc->i_hdr.h_sport;
713 sin->sin_addr.s_addr = inc->i_saddr;
714 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
715 msg->msg_namelen = sizeof(*sin);
713 if (msg->msg_name) {
714 if (ipv6_addr_v4mapped(&inc->i_saddr)) {
715 sin = (struct sockaddr_in *)msg->msg_name;
716
717 sin->sin_family = AF_INET;
718 sin->sin_port = inc->i_hdr.h_sport;
719 sin->sin_addr.s_addr =
720 inc->i_saddr.s6_addr32[3];
721 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
722 msg->msg_namelen = sizeof(*sin);
723 } else {
724 sin6 = (struct sockaddr_in6 *)msg->msg_name;
725
726 sin6->sin6_family = AF_INET6;
727 sin6->sin6_port = inc->i_hdr.h_sport;
728 sin6->sin6_addr = inc->i_saddr;
729 sin6->sin6_flowinfo = 0;
730 sin6->sin6_scope_id = rs->rs_bound_scope_id;
731 msg->msg_namelen = sizeof(*sin6);
732 }
716 }
717 break;
718 }
719
720 if (inc)
721 rds_inc_put(inc);
722
723out:

--- 54 unchanged lines hidden ---
733 }
734 break;
735 }
736
737 if (inc)
738 rds_inc_put(inc);
739
740out:

--- 54 unchanged lines hidden ---