1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * CLC (connection layer control) handshake over initial TCP socket to 5 * prepare for RDMA traffic 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/in.h> 13 #include <linux/if_ether.h> 14 #include <linux/sched/signal.h> 15 16 #include <net/sock.h> 17 #include <net/tcp.h> 18 19 #include "smc.h" 20 #include "smc_core.h" 21 #include "smc_clc.h" 22 #include "smc_ib.h" 23 24 /* Wait for data on the tcp-socket, analyze received data 25 * Returns: 26 * 0 if success and it was not a decline that we received. 27 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 28 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 29 */ 30 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 31 u8 expected_type) 32 { 33 struct sock *clc_sk = smc->clcsock->sk; 34 struct smc_clc_msg_hdr *clcm = buf; 35 struct msghdr msg = {NULL, 0}; 36 int reason_code = 0; 37 struct kvec vec; 38 int len, datlen; 39 int krflags; 40 41 /* peek the first few bytes to determine length of data to receive 42 * so we don't consume any subsequent CLC message or payload data 43 * in the TCP byte stream 44 */ 45 vec.iov_base = buf; 46 vec.iov_len = buflen; 47 krflags = MSG_PEEK | MSG_WAITALL; 48 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 49 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, 50 sizeof(struct smc_clc_msg_hdr), krflags); 51 if (signal_pending(current)) { 52 reason_code = -EINTR; 53 clc_sk->sk_err = EINTR; 54 smc->sk.sk_err = EINTR; 55 goto out; 56 } 57 if (clc_sk->sk_err) { 58 reason_code = -clc_sk->sk_err; 59 smc->sk.sk_err = clc_sk->sk_err; 60 goto out; 61 } 62 if (!len) { /* peer has performed orderly shutdown */ 63 smc->sk.sk_err = ECONNRESET; 64 reason_code = -ECONNRESET; 65 goto out; 66 } 67 if (len < 0) { 68 smc->sk.sk_err = -len; 69 reason_code = len; 70 goto out; 71 } 72 datlen = ntohs(clcm->length); 73 if ((len < sizeof(struct smc_clc_msg_hdr)) || 74 (datlen < sizeof(struct smc_clc_msg_decline)) || 75 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || 76 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || 77 ((clcm->type != SMC_CLC_DECLINE) && 78 (clcm->type != expected_type))) { 79 smc->sk.sk_err = EPROTO; 80 reason_code = -EPROTO; 81 goto out; 82 } 83 84 /* receive the complete CLC message */ 85 vec.iov_base = buf; 86 vec.iov_len = buflen; 87 memset(&msg, 0, sizeof(struct msghdr)); 88 krflags = MSG_WAITALL; 89 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 90 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); 91 if (len < datlen) { 92 smc->sk.sk_err = EPROTO; 93 reason_code = -EPROTO; 94 goto out; 95 } 96 if (clcm->type == SMC_CLC_DECLINE) { 97 reason_code = SMC_CLC_DECL_REPLY; 98 if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis) 99 == SMC_CLC_DECL_SYNCERR) 100 smc->conn.lgr->sync_err = true; 101 } 102 103 out: 104 return reason_code; 105 } 106 107 /* send CLC DECLINE message across internal TCP socket */ 108 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, 109 u8 out_of_sync) 110 { 111 struct smc_clc_msg_decline dclc; 112 struct msghdr msg; 113 struct kvec vec; 114 int len; 115 116 memset(&dclc, 0, sizeof(dclc)); 117 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 118 dclc.hdr.type = SMC_CLC_DECLINE; 119 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 120 dclc.hdr.version = SMC_CLC_V1; 121 dclc.hdr.flag = out_of_sync ? 1 : 0; 122 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); 123 dclc.peer_diagnosis = htonl(peer_diag_info); 124 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 125 126 memset(&msg, 0, sizeof(msg)); 127 vec.iov_base = &dclc; 128 vec.iov_len = sizeof(struct smc_clc_msg_decline); 129 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 130 sizeof(struct smc_clc_msg_decline)); 131 if (len < sizeof(struct smc_clc_msg_decline)) 132 smc->sk.sk_err = EPROTO; 133 if (len < 0) 134 smc->sk.sk_err = -len; 135 return len; 136 } 137 138 /* send CLC PROPOSAL message across internal TCP socket */ 139 int smc_clc_send_proposal(struct smc_sock *smc, 140 struct smc_ib_device *smcibdev, 141 u8 ibport) 142 { 143 struct smc_clc_msg_proposal pclc; 144 int reason_code = 0; 145 struct msghdr msg; 146 struct kvec vec; 147 int len, rc; 148 149 /* send SMC Proposal CLC message */ 150 memset(&pclc, 0, sizeof(pclc)); 151 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 152 pclc.hdr.type = SMC_CLC_PROPOSAL; 153 pclc.hdr.length = htons(sizeof(pclc)); 154 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 155 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 156 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 157 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 158 159 /* determine subnet and mask from internal TCP socket */ 160 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, 161 &pclc.prefix_len); 162 if (rc) 163 return SMC_CLC_DECL_CNFERR; /* configuration error */ 164 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 165 memset(&msg, 0, sizeof(msg)); 166 vec.iov_base = &pclc; 167 vec.iov_len = sizeof(pclc); 168 /* due to the few bytes needed for clc-handshake this cannot block */ 169 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); 170 if (len < sizeof(pclc)) { 171 if (len >= 0) { 172 reason_code = -ENETUNREACH; 173 smc->sk.sk_err = -reason_code; 174 } else { 175 smc->sk.sk_err = smc->clcsock->sk->sk_err; 176 reason_code = -smc->sk.sk_err; 177 } 178 } 179 180 return reason_code; 181 } 182 183 /* send CLC CONFIRM message across internal TCP socket */ 184 int smc_clc_send_confirm(struct smc_sock *smc) 185 { 186 struct smc_connection *conn = &smc->conn; 187 struct smc_clc_msg_accept_confirm cclc; 188 struct smc_link *link; 189 int reason_code = 0; 190 struct msghdr msg; 191 struct kvec vec; 192 int len; 193 194 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 195 /* send SMC Confirm CLC msg */ 196 memset(&cclc, 0, sizeof(cclc)); 197 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 198 cclc.hdr.type = SMC_CLC_CONFIRM; 199 cclc.hdr.length = htons(sizeof(cclc)); 200 cclc.hdr.version = SMC_CLC_V1; /* SMC version */ 201 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 202 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 203 SMC_GID_SIZE); 204 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 205 hton24(cclc.qpn, link->roce_qp->qp_num); 206 cclc.rmb_rkey = 207 htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); 208 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ 209 cclc.rmbe_alert_token = htonl(conn->alert_token_local); 210 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); 211 cclc.rmbe_size = conn->rmbe_size_short; 212 cclc.rmb_dma_addr = 213 cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); 214 hton24(cclc.psn, link->psn_initial); 215 216 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 217 218 memset(&msg, 0, sizeof(msg)); 219 vec.iov_base = &cclc; 220 vec.iov_len = sizeof(cclc); 221 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); 222 if (len < sizeof(cclc)) { 223 if (len >= 0) { 224 reason_code = -ENETUNREACH; 225 smc->sk.sk_err = -reason_code; 226 } else { 227 smc->sk.sk_err = smc->clcsock->sk->sk_err; 228 reason_code = -smc->sk.sk_err; 229 } 230 } 231 return reason_code; 232 } 233 234 /* send CLC ACCEPT message across internal TCP socket */ 235 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) 236 { 237 struct smc_connection *conn = &new_smc->conn; 238 struct smc_clc_msg_accept_confirm aclc; 239 struct smc_link *link; 240 struct msghdr msg; 241 struct kvec vec; 242 int rc = 0; 243 int len; 244 245 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 246 memset(&aclc, 0, sizeof(aclc)); 247 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 248 aclc.hdr.type = SMC_CLC_ACCEPT; 249 aclc.hdr.length = htons(sizeof(aclc)); 250 aclc.hdr.version = SMC_CLC_V1; /* SMC version */ 251 if (srv_first_contact) 252 aclc.hdr.flag = 1; 253 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 254 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 255 SMC_GID_SIZE); 256 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 257 hton24(aclc.qpn, link->roce_qp->qp_num); 258 aclc.rmb_rkey = 259 htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); 260 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ 261 aclc.rmbe_alert_token = htonl(conn->alert_token_local); 262 aclc.qp_mtu = link->path_mtu; 263 aclc.rmbe_size = conn->rmbe_size_short, 264 aclc.rmb_dma_addr = 265 cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); 266 hton24(aclc.psn, link->psn_initial); 267 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 268 269 memset(&msg, 0, sizeof(msg)); 270 vec.iov_base = &aclc; 271 vec.iov_len = sizeof(aclc); 272 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); 273 if (len < sizeof(aclc)) { 274 if (len >= 0) 275 new_smc->sk.sk_err = EPROTO; 276 else 277 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err; 278 rc = sock_error(&new_smc->sk); 279 } 280 281 return rc; 282 } 283