1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * CLC (connection layer control) handshake over initial TCP socket to 6 * prepare for RDMA traffic 7 * 8 * Copyright IBM Corp. 2016 9 * 10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 11 */ 12 13 #include <linux/in.h> 14 #include <linux/if_ether.h> 15 #include <linux/sched/signal.h> 16 17 #include <net/sock.h> 18 #include <net/tcp.h> 19 20 #include "smc.h" 21 #include "smc_core.h" 22 #include "smc_clc.h" 23 #include "smc_ib.h" 24 25 /* check if received message has a correct header length and contains valid 26 * heading and trailing eyecatchers 27 */ 28 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) 29 { 30 struct smc_clc_msg_proposal_prefix *pclc_prfx; 31 struct smc_clc_msg_accept_confirm *clc; 32 struct smc_clc_msg_proposal *pclc; 33 struct smc_clc_msg_decline *dclc; 34 struct smc_clc_msg_trail *trl; 35 36 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) 37 return false; 38 switch (clcm->type) { 39 case SMC_CLC_PROPOSAL: 40 pclc = (struct smc_clc_msg_proposal *)clcm; 41 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 42 if (ntohs(pclc->hdr.length) != 43 sizeof(*pclc) + ntohs(pclc->iparea_offset) + 44 sizeof(*pclc_prfx) + 45 pclc_prfx->ipv6_prefixes_cnt * 46 sizeof(struct smc_clc_ipv6_prefix) + 47 sizeof(*trl)) 48 return false; 49 trl = (struct smc_clc_msg_trail *) 50 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); 51 break; 52 case SMC_CLC_ACCEPT: 53 case SMC_CLC_CONFIRM: 54 clc = (struct smc_clc_msg_accept_confirm *)clcm; 55 if (ntohs(clc->hdr.length) != sizeof(*clc)) 56 return false; 57 trl = &clc->trl; 58 break; 59 case SMC_CLC_DECLINE: 60 dclc = (struct smc_clc_msg_decline *)clcm; 61 if (ntohs(dclc->hdr.length) != sizeof(*dclc)) 62 return false; 63 trl = &dclc->trl; 64 break; 65 default: 66 return false; 67 } 68 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) 69 return false; 70 return true; 71 } 72 73 /* Wait for data on the tcp-socket, analyze received data 74 * Returns: 75 * 0 if success and it was not a decline that we received. 76 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 77 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 78 */ 79 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 80 u8 expected_type) 81 { 82 struct sock *clc_sk = smc->clcsock->sk; 83 struct smc_clc_msg_hdr *clcm = buf; 84 struct msghdr msg = {NULL, 0}; 85 int reason_code = 0; 86 struct kvec vec = {buf, buflen}; 87 int len, datlen; 88 int krflags; 89 90 /* peek the first few bytes to determine length of data to receive 91 * so we don't consume any subsequent CLC message or payload data 92 * in the TCP byte stream 93 */ 94 /* 95 * Caller must make sure that buflen is no less than 96 * sizeof(struct smc_clc_msg_hdr) 97 */ 98 krflags = MSG_PEEK | MSG_WAITALL; 99 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 100 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, 101 sizeof(struct smc_clc_msg_hdr)); 102 len = sock_recvmsg(smc->clcsock, &msg, krflags); 103 if (signal_pending(current)) { 104 reason_code = -EINTR; 105 clc_sk->sk_err = EINTR; 106 smc->sk.sk_err = EINTR; 107 goto out; 108 } 109 if (clc_sk->sk_err) { 110 reason_code = -clc_sk->sk_err; 111 smc->sk.sk_err = clc_sk->sk_err; 112 goto out; 113 } 114 if (!len) { /* peer has performed orderly shutdown */ 115 smc->sk.sk_err = ECONNRESET; 116 reason_code = -ECONNRESET; 117 goto out; 118 } 119 if (len < 0) { 120 smc->sk.sk_err = -len; 121 reason_code = len; 122 goto out; 123 } 124 datlen = ntohs(clcm->length); 125 if ((len < sizeof(struct smc_clc_msg_hdr)) || 126 (datlen > buflen) || 127 ((clcm->type != SMC_CLC_DECLINE) && 128 (clcm->type != expected_type))) { 129 smc->sk.sk_err = EPROTO; 130 reason_code = -EPROTO; 131 goto out; 132 } 133 134 /* receive the complete CLC message */ 135 memset(&msg, 0, sizeof(struct msghdr)); 136 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen); 137 krflags = MSG_WAITALL; 138 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 139 len = sock_recvmsg(smc->clcsock, &msg, krflags); 140 if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { 141 smc->sk.sk_err = EPROTO; 142 reason_code = -EPROTO; 143 goto out; 144 } 145 if (clcm->type == SMC_CLC_DECLINE) { 146 reason_code = SMC_CLC_DECL_REPLY; 147 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { 148 smc->conn.lgr->sync_err = true; 149 smc_lgr_terminate(smc->conn.lgr); 150 } 151 } 152 153 out: 154 return reason_code; 155 } 156 157 /* send CLC DECLINE message across internal TCP socket */ 158 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) 159 { 160 struct smc_clc_msg_decline dclc; 161 struct msghdr msg; 162 struct kvec vec; 163 int len; 164 165 memset(&dclc, 0, sizeof(dclc)); 166 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 167 dclc.hdr.type = SMC_CLC_DECLINE; 168 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 169 dclc.hdr.version = SMC_CLC_V1; 170 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; 171 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); 172 dclc.peer_diagnosis = htonl(peer_diag_info); 173 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 174 175 memset(&msg, 0, sizeof(msg)); 176 vec.iov_base = &dclc; 177 vec.iov_len = sizeof(struct smc_clc_msg_decline); 178 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 179 sizeof(struct smc_clc_msg_decline)); 180 if (len < sizeof(struct smc_clc_msg_decline)) 181 smc->sk.sk_err = EPROTO; 182 if (len < 0) 183 smc->sk.sk_err = -len; 184 return sock_error(&smc->sk); 185 } 186 187 /* send CLC PROPOSAL message across internal TCP socket */ 188 int smc_clc_send_proposal(struct smc_sock *smc, 189 struct smc_ib_device *smcibdev, 190 u8 ibport) 191 { 192 struct smc_clc_msg_proposal_prefix pclc_prfx; 193 struct smc_clc_msg_proposal pclc; 194 struct smc_clc_msg_trail trl; 195 int reason_code = 0; 196 struct kvec vec[3]; 197 struct msghdr msg; 198 int len, plen, rc; 199 200 /* send SMC Proposal CLC message */ 201 plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); 202 memset(&pclc, 0, sizeof(pclc)); 203 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 204 pclc.hdr.type = SMC_CLC_PROPOSAL; 205 pclc.hdr.length = htons(plen); 206 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 207 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 208 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 209 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 210 pclc.iparea_offset = htons(0); 211 212 memset(&pclc_prfx, 0, sizeof(pclc_prfx)); 213 /* determine subnet and mask from internal TCP socket */ 214 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, 215 &pclc_prfx.prefix_len); 216 if (rc) 217 return SMC_CLC_DECL_CNFERR; /* configuration error */ 218 pclc_prfx.ipv6_prefixes_cnt = 0; 219 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 220 memset(&msg, 0, sizeof(msg)); 221 vec[0].iov_base = &pclc; 222 vec[0].iov_len = sizeof(pclc); 223 vec[1].iov_base = &pclc_prfx; 224 vec[1].iov_len = sizeof(pclc_prfx); 225 vec[2].iov_base = &trl; 226 vec[2].iov_len = sizeof(trl); 227 /* due to the few bytes needed for clc-handshake this cannot block */ 228 len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); 229 if (len < sizeof(pclc)) { 230 if (len >= 0) { 231 reason_code = -ENETUNREACH; 232 smc->sk.sk_err = -reason_code; 233 } else { 234 smc->sk.sk_err = smc->clcsock->sk->sk_err; 235 reason_code = -smc->sk.sk_err; 236 } 237 } 238 239 return reason_code; 240 } 241 242 /* send CLC CONFIRM message across internal TCP socket */ 243 int smc_clc_send_confirm(struct smc_sock *smc) 244 { 245 struct smc_connection *conn = &smc->conn; 246 struct smc_clc_msg_accept_confirm cclc; 247 struct smc_link *link; 248 int reason_code = 0; 249 struct msghdr msg; 250 struct kvec vec; 251 int len; 252 253 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 254 /* send SMC Confirm CLC msg */ 255 memset(&cclc, 0, sizeof(cclc)); 256 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 257 cclc.hdr.type = SMC_CLC_CONFIRM; 258 cclc.hdr.length = htons(sizeof(cclc)); 259 cclc.hdr.version = SMC_CLC_V1; /* SMC version */ 260 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 261 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 262 SMC_GID_SIZE); 263 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 264 hton24(cclc.qpn, link->roce_qp->qp_num); 265 cclc.rmb_rkey = 266 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 267 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ 268 cclc.rmbe_alert_token = htonl(conn->alert_token_local); 269 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); 270 cclc.rmbe_size = conn->rmbe_size_short; 271 cclc.rmb_dma_addr = cpu_to_be64( 272 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 273 hton24(cclc.psn, link->psn_initial); 274 275 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 276 277 memset(&msg, 0, sizeof(msg)); 278 vec.iov_base = &cclc; 279 vec.iov_len = sizeof(cclc); 280 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); 281 if (len < sizeof(cclc)) { 282 if (len >= 0) { 283 reason_code = -ENETUNREACH; 284 smc->sk.sk_err = -reason_code; 285 } else { 286 smc->sk.sk_err = smc->clcsock->sk->sk_err; 287 reason_code = -smc->sk.sk_err; 288 } 289 } 290 return reason_code; 291 } 292 293 /* send CLC ACCEPT message across internal TCP socket */ 294 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) 295 { 296 struct smc_connection *conn = &new_smc->conn; 297 struct smc_clc_msg_accept_confirm aclc; 298 struct smc_link *link; 299 struct msghdr msg; 300 struct kvec vec; 301 int rc = 0; 302 int len; 303 304 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 305 memset(&aclc, 0, sizeof(aclc)); 306 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 307 aclc.hdr.type = SMC_CLC_ACCEPT; 308 aclc.hdr.length = htons(sizeof(aclc)); 309 aclc.hdr.version = SMC_CLC_V1; /* SMC version */ 310 if (srv_first_contact) 311 aclc.hdr.flag = 1; 312 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 313 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 314 SMC_GID_SIZE); 315 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 316 hton24(aclc.qpn, link->roce_qp->qp_num); 317 aclc.rmb_rkey = 318 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 319 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ 320 aclc.rmbe_alert_token = htonl(conn->alert_token_local); 321 aclc.qp_mtu = link->path_mtu; 322 aclc.rmbe_size = conn->rmbe_size_short, 323 aclc.rmb_dma_addr = cpu_to_be64( 324 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 325 hton24(aclc.psn, link->psn_initial); 326 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 327 328 memset(&msg, 0, sizeof(msg)); 329 vec.iov_base = &aclc; 330 vec.iov_len = sizeof(aclc); 331 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); 332 if (len < sizeof(aclc)) { 333 if (len >= 0) 334 new_smc->sk.sk_err = EPROTO; 335 else 336 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err; 337 rc = sock_error(&new_smc->sk); 338 } 339 340 return rc; 341 } 342