1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * CLC (connection layer control) handshake over initial TCP socket to 5 * prepare for RDMA traffic 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/in.h> 13 #include <linux/if_ether.h> 14 #include <net/sock.h> 15 #include <net/tcp.h> 16 17 #include "smc.h" 18 #include "smc_core.h" 19 #include "smc_clc.h" 20 #include "smc_ib.h" 21 22 /* Wait for data on the tcp-socket, analyze received data 23 * Returns: 24 * 0 if success and it was not a decline that we received. 25 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 26 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 27 */ 28 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 29 u8 expected_type) 30 { 31 struct sock *clc_sk = smc->clcsock->sk; 32 struct smc_clc_msg_hdr *clcm = buf; 33 struct msghdr msg = {NULL, 0}; 34 int reason_code = 0; 35 struct kvec vec; 36 int len, datlen; 37 int krflags; 38 39 /* peek the first few bytes to determine length of data to receive 40 * so we don't consume any subsequent CLC message or payload data 41 * in the TCP byte stream 42 */ 43 vec.iov_base = buf; 44 vec.iov_len = buflen; 45 krflags = MSG_PEEK | MSG_WAITALL; 46 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 47 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, 48 sizeof(struct smc_clc_msg_hdr), krflags); 49 if (signal_pending(current)) { 50 reason_code = -EINTR; 51 clc_sk->sk_err = EINTR; 52 smc->sk.sk_err = EINTR; 53 goto out; 54 } 55 if (clc_sk->sk_err) { 56 reason_code = -clc_sk->sk_err; 57 smc->sk.sk_err = clc_sk->sk_err; 58 goto out; 59 } 60 if (!len) { /* peer has performed orderly shutdown */ 61 smc->sk.sk_err = ECONNRESET; 62 reason_code = -ECONNRESET; 63 goto out; 64 } 65 if (len < 0) { 66 smc->sk.sk_err = -len; 67 reason_code = len; 68 goto out; 69 } 70 datlen = ntohs(clcm->length); 71 if ((len < sizeof(struct smc_clc_msg_hdr)) || 72 (datlen < sizeof(struct smc_clc_msg_decline)) || 73 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || 74 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || 75 ((clcm->type != SMC_CLC_DECLINE) && 76 (clcm->type != expected_type))) { 77 smc->sk.sk_err = EPROTO; 78 reason_code = -EPROTO; 79 goto out; 80 } 81 82 /* receive the complete CLC message */ 83 vec.iov_base = buf; 84 vec.iov_len = buflen; 85 memset(&msg, 0, sizeof(struct msghdr)); 86 krflags = MSG_WAITALL; 87 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 88 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); 89 if (len < datlen) { 90 smc->sk.sk_err = EPROTO; 91 reason_code = -EPROTO; 92 goto out; 93 } 94 if (clcm->type == SMC_CLC_DECLINE) { 95 reason_code = SMC_CLC_DECL_REPLY; 96 if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis) 97 == SMC_CLC_DECL_SYNCERR) 98 smc->conn.lgr->sync_err = true; 99 } 100 101 out: 102 return reason_code; 103 } 104 105 /* send CLC DECLINE message across internal TCP socket */ 106 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, 107 u8 out_of_sync) 108 { 109 struct smc_clc_msg_decline dclc; 110 struct msghdr msg; 111 struct kvec vec; 112 int len; 113 114 memset(&dclc, 0, sizeof(dclc)); 115 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 116 dclc.hdr.type = SMC_CLC_DECLINE; 117 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 118 dclc.hdr.version = SMC_CLC_V1; 119 dclc.hdr.flag = out_of_sync ? 1 : 0; 120 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); 121 dclc.peer_diagnosis = htonl(peer_diag_info); 122 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 123 124 memset(&msg, 0, sizeof(msg)); 125 vec.iov_base = &dclc; 126 vec.iov_len = sizeof(struct smc_clc_msg_decline); 127 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 128 sizeof(struct smc_clc_msg_decline)); 129 if (len < sizeof(struct smc_clc_msg_decline)) 130 smc->sk.sk_err = EPROTO; 131 if (len < 0) 132 smc->sk.sk_err = -len; 133 return len; 134 } 135 136 /* send CLC PROPOSAL message across internal TCP socket */ 137 int smc_clc_send_proposal(struct smc_sock *smc, 138 struct smc_ib_device *smcibdev, 139 u8 ibport) 140 { 141 struct smc_clc_msg_proposal pclc; 142 int reason_code = 0; 143 struct msghdr msg; 144 struct kvec vec; 145 int len, rc; 146 147 /* send SMC Proposal CLC message */ 148 memset(&pclc, 0, sizeof(pclc)); 149 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 150 pclc.hdr.type = SMC_CLC_PROPOSAL; 151 pclc.hdr.length = htons(sizeof(pclc)); 152 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 153 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 154 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 155 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 156 157 /* determine subnet and mask from internal TCP socket */ 158 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, 159 &pclc.prefix_len); 160 if (rc) 161 return SMC_CLC_DECL_CNFERR; /* configuration error */ 162 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 163 memset(&msg, 0, sizeof(msg)); 164 vec.iov_base = &pclc; 165 vec.iov_len = sizeof(pclc); 166 /* due to the few bytes needed for clc-handshake this cannot block */ 167 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); 168 if (len < sizeof(pclc)) { 169 if (len >= 0) { 170 reason_code = -ENETUNREACH; 171 smc->sk.sk_err = -reason_code; 172 } else { 173 smc->sk.sk_err = smc->clcsock->sk->sk_err; 174 reason_code = -smc->sk.sk_err; 175 } 176 } 177 178 return reason_code; 179 } 180 181 /* send CLC CONFIRM message across internal TCP socket */ 182 int smc_clc_send_confirm(struct smc_sock *smc) 183 { 184 struct smc_connection *conn = &smc->conn; 185 struct smc_clc_msg_accept_confirm cclc; 186 struct smc_link *link; 187 int reason_code = 0; 188 struct msghdr msg; 189 struct kvec vec; 190 int len; 191 192 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 193 /* send SMC Confirm CLC msg */ 194 memset(&cclc, 0, sizeof(cclc)); 195 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 196 cclc.hdr.type = SMC_CLC_CONFIRM; 197 cclc.hdr.length = htons(sizeof(cclc)); 198 cclc.hdr.version = SMC_CLC_V1; /* SMC version */ 199 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 200 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 201 SMC_GID_SIZE); 202 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 203 hton24(cclc.qpn, link->roce_qp->qp_num); 204 cclc.rmb_rkey = 205 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 206 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ 207 cclc.rmbe_alert_token = htonl(conn->alert_token_local); 208 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); 209 cclc.rmbe_size = conn->rmbe_size_short; 210 cclc.rmb_dma_addr = 211 cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); 212 hton24(cclc.psn, link->psn_initial); 213 214 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 215 216 memset(&msg, 0, sizeof(msg)); 217 vec.iov_base = &cclc; 218 vec.iov_len = sizeof(cclc); 219 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); 220 if (len < sizeof(cclc)) { 221 if (len >= 0) { 222 reason_code = -ENETUNREACH; 223 smc->sk.sk_err = -reason_code; 224 } else { 225 smc->sk.sk_err = smc->clcsock->sk->sk_err; 226 reason_code = -smc->sk.sk_err; 227 } 228 } 229 return reason_code; 230 } 231 232 /* send CLC ACCEPT message across internal TCP socket */ 233 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) 234 { 235 struct smc_connection *conn = &new_smc->conn; 236 struct smc_clc_msg_accept_confirm aclc; 237 struct smc_link *link; 238 struct msghdr msg; 239 struct kvec vec; 240 int rc = 0; 241 int len; 242 243 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 244 memset(&aclc, 0, sizeof(aclc)); 245 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 246 aclc.hdr.type = SMC_CLC_ACCEPT; 247 aclc.hdr.length = htons(sizeof(aclc)); 248 aclc.hdr.version = SMC_CLC_V1; /* SMC version */ 249 if (srv_first_contact) 250 aclc.hdr.flag = 1; 251 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 252 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 253 SMC_GID_SIZE); 254 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 255 hton24(aclc.qpn, link->roce_qp->qp_num); 256 aclc.rmb_rkey = 257 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 258 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ 259 aclc.rmbe_alert_token = htonl(conn->alert_token_local); 260 aclc.qp_mtu = link->path_mtu; 261 aclc.rmbe_size = conn->rmbe_size_short, 262 aclc.rmb_dma_addr = 263 cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); 264 hton24(aclc.psn, link->psn_initial); 265 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 266 267 memset(&msg, 0, sizeof(msg)); 268 vec.iov_base = &aclc; 269 vec.iov_len = sizeof(aclc); 270 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); 271 if (len < sizeof(aclc)) { 272 if (len >= 0) 273 new_smc->sk.sk_err = EPROTO; 274 else 275 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err; 276 rc = sock_error(&new_smc->sk); 277 } 278 279 return rc; 280 } 281