1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * CLC (connection layer control) handshake over initial TCP socket to 6 * prepare for RDMA traffic 7 * 8 * Copyright IBM Corp. 2016, 2018 9 * 10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 11 */ 12 13 #include <linux/in.h> 14 #include <linux/inetdevice.h> 15 #include <linux/if_ether.h> 16 #include <linux/sched/signal.h> 17 #include <linux/utsname.h> 18 #include <linux/ctype.h> 19 20 #include <net/addrconf.h> 21 #include <net/sock.h> 22 #include <net/tcp.h> 23 24 #include "smc.h" 25 #include "smc_core.h" 26 #include "smc_clc.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_netlink.h" 30 31 #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 32 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 33 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 34 #define SMC_CLC_RECV_BUF_LEN 100 35 36 /* eye catcher "SMCR" EBCDIC for CLC messages */ 37 static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; 38 /* eye catcher "SMCD" EBCDIC for CLC messages */ 39 static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; 40 41 static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; 42 43 struct smc_clc_eid_table { 44 rwlock_t lock; 45 struct list_head list; 46 u8 ueid_cnt; 47 u8 seid_enabled; 48 }; 49 50 static struct smc_clc_eid_table smc_clc_eid_table; 51 52 struct smc_clc_eid_entry { 53 struct list_head list; 54 u8 eid[SMC_MAX_EID_LEN]; 55 }; 56 57 /* The size of a user EID is 32 characters. 58 * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'. 59 * Blanks should only be used to pad to the expected size. 60 * First character must be alphanumeric. 61 */ 62 static bool smc_clc_ueid_valid(char *ueid) 63 { 64 char *end = ueid + SMC_MAX_EID_LEN; 65 66 while (--end >= ueid && isspace(*end)) 67 ; 68 if (end < ueid) 69 return false; 70 if (!isalnum(*ueid) || islower(*ueid)) 71 return false; 72 while (ueid <= end) { 73 if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' && 74 *ueid != '-') 75 return false; 76 ueid++; 77 } 78 return true; 79 } 80 81 static int smc_clc_ueid_add(char *ueid) 82 { 83 struct smc_clc_eid_entry *new_ueid, *tmp_ueid; 84 int rc; 85 86 if (!smc_clc_ueid_valid(ueid)) 87 return -EINVAL; 88 89 /* add a new ueid entry to the ueid table if there isn't one */ 90 new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL); 91 if (!new_ueid) 92 return -ENOMEM; 93 memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN); 94 95 write_lock(&smc_clc_eid_table.lock); 96 if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) { 97 rc = -ERANGE; 98 goto err_out; 99 } 100 list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { 101 if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) { 102 rc = -EEXIST; 103 goto err_out; 104 } 105 } 106 list_add_tail(&new_ueid->list, &smc_clc_eid_table.list); 107 smc_clc_eid_table.ueid_cnt++; 108 write_unlock(&smc_clc_eid_table.lock); 109 return 0; 110 111 err_out: 112 write_unlock(&smc_clc_eid_table.lock); 113 kfree(new_ueid); 114 return rc; 115 } 116 117 int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) 118 { 119 struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; 120 char *ueid; 121 122 if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) 123 return -EINVAL; 124 ueid = (char *)nla_data(nla_ueid); 125 126 return smc_clc_ueid_add(ueid); 127 } 128 129 /* remove one or all ueid entries from the table */ 130 static int smc_clc_ueid_remove(char *ueid) 131 { 132 struct smc_clc_eid_entry *lst_ueid, *tmp_ueid; 133 int rc = -ENOENT; 134 135 /* remove table entry */ 136 write_lock(&smc_clc_eid_table.lock); 137 list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list, 138 list) { 139 if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) { 140 list_del(&lst_ueid->list); 141 smc_clc_eid_table.ueid_cnt--; 142 kfree(lst_ueid); 143 rc = 0; 144 } 145 } 146 if (!rc && !smc_clc_eid_table.ueid_cnt) { 147 smc_clc_eid_table.seid_enabled = 1; 148 rc = -EAGAIN; /* indicate success and enabling of seid */ 149 } 150 write_unlock(&smc_clc_eid_table.lock); 151 return rc; 152 } 153 154 int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info) 155 { 156 struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; 157 char *ueid; 158 159 if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) 160 return -EINVAL; 161 ueid = (char *)nla_data(nla_ueid); 162 163 return smc_clc_ueid_remove(ueid); 164 } 165 166 int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info) 167 { 168 smc_clc_ueid_remove(NULL); 169 return 0; 170 } 171 172 static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, 173 u32 flags, char *ueid) 174 { 175 char ueid_str[SMC_MAX_EID_LEN + 1]; 176 void *hdr; 177 178 hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family, 179 flags, SMC_NETLINK_DUMP_UEID); 180 if (!hdr) 181 return -ENOMEM; 182 snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); 183 if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { 184 genlmsg_cancel(skb, hdr); 185 return -EMSGSIZE; 186 } 187 genlmsg_end(skb, hdr); 188 return 0; 189 } 190 191 static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq, 192 int start_idx) 193 { 194 struct smc_clc_eid_entry *lst_ueid; 195 int idx = 0; 196 197 read_lock(&smc_clc_eid_table.lock); 198 list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) { 199 if (idx++ < start_idx) 200 continue; 201 if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI, 202 lst_ueid->eid)) { 203 --idx; 204 break; 205 } 206 } 207 read_unlock(&smc_clc_eid_table.lock); 208 return idx; 209 } 210 211 int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb) 212 { 213 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); 214 int idx; 215 216 idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid, 217 cb->nlh->nlmsg_seq, cb_ctx->pos[0]); 218 219 cb_ctx->pos[0] = idx; 220 return skb->len; 221 } 222 223 int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) 224 { 225 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); 226 char seid_str[SMC_MAX_EID_LEN + 1]; 227 u8 seid_enabled; 228 void *hdr; 229 u8 *seid; 230 231 if (cb_ctx->pos[0]) 232 return skb->len; 233 234 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 235 &smc_gen_nl_family, NLM_F_MULTI, 236 SMC_NETLINK_DUMP_SEID); 237 if (!hdr) 238 return -ENOMEM; 239 if (!smc_ism_is_v2_capable()) 240 goto end; 241 242 smc_ism_get_system_eid(&seid); 243 snprintf(seid_str, sizeof(seid_str), "%s", seid); 244 if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) 245 goto err; 246 read_lock(&smc_clc_eid_table.lock); 247 seid_enabled = smc_clc_eid_table.seid_enabled; 248 read_unlock(&smc_clc_eid_table.lock); 249 if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled)) 250 goto err; 251 end: 252 genlmsg_end(skb, hdr); 253 cb_ctx->pos[0]++; 254 return skb->len; 255 err: 256 genlmsg_cancel(skb, hdr); 257 return -EMSGSIZE; 258 } 259 260 int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) 261 { 262 write_lock(&smc_clc_eid_table.lock); 263 smc_clc_eid_table.seid_enabled = 1; 264 write_unlock(&smc_clc_eid_table.lock); 265 return 0; 266 } 267 268 int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) 269 { 270 int rc = 0; 271 272 write_lock(&smc_clc_eid_table.lock); 273 if (!smc_clc_eid_table.ueid_cnt) 274 rc = -ENOENT; 275 else 276 smc_clc_eid_table.seid_enabled = 0; 277 write_unlock(&smc_clc_eid_table.lock); 278 return rc; 279 } 280 281 static bool _smc_clc_match_ueid(u8 *peer_ueid) 282 { 283 struct smc_clc_eid_entry *tmp_ueid; 284 285 list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { 286 if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN)) 287 return true; 288 } 289 return false; 290 } 291 292 bool smc_clc_match_eid(u8 *negotiated_eid, 293 struct smc_clc_v2_extension *smc_v2_ext, 294 u8 *peer_eid, u8 *local_eid) 295 { 296 bool match = false; 297 int i; 298 299 negotiated_eid[0] = 0; 300 read_lock(&smc_clc_eid_table.lock); 301 if (smc_clc_eid_table.seid_enabled && 302 smc_v2_ext->hdr.flag.seid && 303 !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { 304 memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); 305 match = true; 306 goto out; 307 } 308 309 for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) { 310 if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) { 311 memcpy(negotiated_eid, smc_v2_ext->user_eids[i], 312 SMC_MAX_EID_LEN); 313 match = true; 314 goto out; 315 } 316 } 317 out: 318 read_unlock(&smc_clc_eid_table.lock); 319 return match; 320 } 321 322 /* check arriving CLC proposal */ 323 static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) 324 { 325 struct smc_clc_msg_proposal_prefix *pclc_prfx; 326 struct smc_clc_smcd_v2_extension *smcd_v2_ext; 327 struct smc_clc_msg_hdr *hdr = &pclc->hdr; 328 struct smc_clc_v2_extension *v2_ext; 329 330 v2_ext = smc_get_clc_v2_ext(pclc); 331 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 332 if (hdr->version == SMC_V1) { 333 if (hdr->typev1 == SMC_TYPE_N) 334 return false; 335 if (ntohs(hdr->length) != 336 sizeof(*pclc) + ntohs(pclc->iparea_offset) + 337 sizeof(*pclc_prfx) + 338 pclc_prfx->ipv6_prefixes_cnt * 339 sizeof(struct smc_clc_ipv6_prefix) + 340 sizeof(struct smc_clc_msg_trail)) 341 return false; 342 } else { 343 if (ntohs(hdr->length) != 344 sizeof(*pclc) + 345 sizeof(struct smc_clc_msg_smcd) + 346 (hdr->typev1 != SMC_TYPE_N ? 347 sizeof(*pclc_prfx) + 348 pclc_prfx->ipv6_prefixes_cnt * 349 sizeof(struct smc_clc_ipv6_prefix) : 0) + 350 (hdr->typev2 != SMC_TYPE_N ? 351 sizeof(*v2_ext) + 352 v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) + 353 (smcd_indicated(hdr->typev2) ? 354 sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt * 355 sizeof(struct smc_clc_smcd_gid_chid) : 356 0) + 357 sizeof(struct smc_clc_msg_trail)) 358 return false; 359 } 360 return true; 361 } 362 363 /* check arriving CLC accept or confirm */ 364 static bool 365 smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) 366 { 367 struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; 368 369 if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) 370 return false; 371 if (hdr->version == SMC_V1) { 372 if ((hdr->typev1 == SMC_TYPE_R && 373 ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || 374 (hdr->typev1 == SMC_TYPE_D && 375 ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) 376 return false; 377 } else { 378 if (hdr->typev1 == SMC_TYPE_D && 379 ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 && 380 (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + 381 sizeof(struct smc_clc_first_contact_ext))) 382 return false; 383 } 384 return true; 385 } 386 387 static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) 388 { 389 memset(fce, 0, sizeof(*fce)); 390 fce->os_type = SMC_CLC_OS_LINUX; 391 fce->release = SMC_RELEASE; 392 memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname)); 393 (*len) += sizeof(*fce); 394 } 395 396 /* check if received message has a correct header length and contains valid 397 * heading and trailing eyecatchers 398 */ 399 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) 400 { 401 struct smc_clc_msg_accept_confirm_v2 *clc_v2; 402 struct smc_clc_msg_proposal *pclc; 403 struct smc_clc_msg_decline *dclc; 404 struct smc_clc_msg_trail *trl; 405 406 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && 407 memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) 408 return false; 409 switch (clcm->type) { 410 case SMC_CLC_PROPOSAL: 411 pclc = (struct smc_clc_msg_proposal *)clcm; 412 if (!smc_clc_msg_prop_valid(pclc)) 413 return false; 414 trl = (struct smc_clc_msg_trail *) 415 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); 416 break; 417 case SMC_CLC_ACCEPT: 418 case SMC_CLC_CONFIRM: 419 clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; 420 if (!smc_clc_msg_acc_conf_valid(clc_v2)) 421 return false; 422 trl = (struct smc_clc_msg_trail *) 423 ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - 424 sizeof(*trl)); 425 break; 426 case SMC_CLC_DECLINE: 427 dclc = (struct smc_clc_msg_decline *)clcm; 428 if (ntohs(dclc->hdr.length) != sizeof(*dclc)) 429 return false; 430 trl = &dclc->trl; 431 break; 432 default: 433 return false; 434 } 435 if (check_trl && 436 memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && 437 memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) 438 return false; 439 return true; 440 } 441 442 /* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ 443 static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, 444 struct smc_clc_msg_proposal_prefix *prop) 445 { 446 struct in_device *in_dev = __in_dev_get_rcu(dst->dev); 447 const struct in_ifaddr *ifa; 448 449 if (!in_dev) 450 return -ENODEV; 451 452 in_dev_for_each_ifa_rcu(ifa, in_dev) { 453 if (!inet_ifa_match(ipv4, ifa)) 454 continue; 455 prop->prefix_len = inet_mask_len(ifa->ifa_mask); 456 prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; 457 /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ 458 return 0; 459 } 460 return -ENOENT; 461 } 462 463 /* fill CLC proposal msg with ipv6 prefixes from device */ 464 static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, 465 struct smc_clc_msg_proposal_prefix *prop, 466 struct smc_clc_ipv6_prefix *ipv6_prfx) 467 { 468 #if IS_ENABLED(CONFIG_IPV6) 469 struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); 470 struct inet6_ifaddr *ifa; 471 int cnt = 0; 472 473 if (!in6_dev) 474 return -ENODEV; 475 /* use a maximum of 8 IPv6 prefixes from device */ 476 list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { 477 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) 478 continue; 479 ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, 480 &ifa->addr, ifa->prefix_len); 481 ipv6_prfx[cnt].prefix_len = ifa->prefix_len; 482 cnt++; 483 if (cnt == SMC_CLC_MAX_V6_PREFIX) 484 break; 485 } 486 prop->ipv6_prefixes_cnt = cnt; 487 if (cnt) 488 return 0; 489 #endif 490 return -ENOENT; 491 } 492 493 /* retrieve and set prefixes in CLC proposal msg */ 494 static int smc_clc_prfx_set(struct socket *clcsock, 495 struct smc_clc_msg_proposal_prefix *prop, 496 struct smc_clc_ipv6_prefix *ipv6_prfx) 497 { 498 struct dst_entry *dst = sk_dst_get(clcsock->sk); 499 struct sockaddr_storage addrs; 500 struct sockaddr_in6 *addr6; 501 struct sockaddr_in *addr; 502 int rc = -ENOENT; 503 504 if (!dst) { 505 rc = -ENOTCONN; 506 goto out; 507 } 508 if (!dst->dev) { 509 rc = -ENODEV; 510 goto out_rel; 511 } 512 /* get address to which the internal TCP socket is bound */ 513 kernel_getsockname(clcsock, (struct sockaddr *)&addrs); 514 /* analyze IP specific data of net_device belonging to TCP socket */ 515 addr6 = (struct sockaddr_in6 *)&addrs; 516 rcu_read_lock(); 517 if (addrs.ss_family == PF_INET) { 518 /* IPv4 */ 519 addr = (struct sockaddr_in *)&addrs; 520 rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); 521 } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { 522 /* mapped IPv4 address - peer is IPv4 only */ 523 rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], 524 prop); 525 } else { 526 /* IPv6 */ 527 rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); 528 } 529 rcu_read_unlock(); 530 out_rel: 531 dst_release(dst); 532 out: 533 return rc; 534 } 535 536 /* match ipv4 addrs of dev against addr in CLC proposal */ 537 static int smc_clc_prfx_match4_rcu(struct net_device *dev, 538 struct smc_clc_msg_proposal_prefix *prop) 539 { 540 struct in_device *in_dev = __in_dev_get_rcu(dev); 541 const struct in_ifaddr *ifa; 542 543 if (!in_dev) 544 return -ENODEV; 545 in_dev_for_each_ifa_rcu(ifa, in_dev) { 546 if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && 547 inet_ifa_match(prop->outgoing_subnet, ifa)) 548 return 0; 549 } 550 551 return -ENOENT; 552 } 553 554 /* match ipv6 addrs of dev against addrs in CLC proposal */ 555 static int smc_clc_prfx_match6_rcu(struct net_device *dev, 556 struct smc_clc_msg_proposal_prefix *prop) 557 { 558 #if IS_ENABLED(CONFIG_IPV6) 559 struct inet6_dev *in6_dev = __in6_dev_get(dev); 560 struct smc_clc_ipv6_prefix *ipv6_prfx; 561 struct inet6_ifaddr *ifa; 562 int i, max; 563 564 if (!in6_dev) 565 return -ENODEV; 566 /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ 567 ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); 568 max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); 569 list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { 570 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) 571 continue; 572 for (i = 0; i < max; i++) { 573 if (ifa->prefix_len == ipv6_prfx[i].prefix_len && 574 ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, 575 ifa->prefix_len)) 576 return 0; 577 } 578 } 579 #endif 580 return -ENOENT; 581 } 582 583 /* check if proposed prefixes match one of our device prefixes */ 584 int smc_clc_prfx_match(struct socket *clcsock, 585 struct smc_clc_msg_proposal_prefix *prop) 586 { 587 struct dst_entry *dst = sk_dst_get(clcsock->sk); 588 int rc; 589 590 if (!dst) { 591 rc = -ENOTCONN; 592 goto out; 593 } 594 if (!dst->dev) { 595 rc = -ENODEV; 596 goto out_rel; 597 } 598 rcu_read_lock(); 599 if (!prop->ipv6_prefixes_cnt) 600 rc = smc_clc_prfx_match4_rcu(dst->dev, prop); 601 else 602 rc = smc_clc_prfx_match6_rcu(dst->dev, prop); 603 rcu_read_unlock(); 604 out_rel: 605 dst_release(dst); 606 out: 607 return rc; 608 } 609 610 /* Wait for data on the tcp-socket, analyze received data 611 * Returns: 612 * 0 if success and it was not a decline that we received. 613 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 614 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 615 */ 616 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 617 u8 expected_type, unsigned long timeout) 618 { 619 long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo; 620 struct sock *clc_sk = smc->clcsock->sk; 621 struct smc_clc_msg_hdr *clcm = buf; 622 struct msghdr msg = {NULL, 0}; 623 int reason_code = 0; 624 struct kvec vec = {buf, buflen}; 625 int len, datlen, recvlen; 626 bool check_trl = true; 627 int krflags; 628 629 /* peek the first few bytes to determine length of data to receive 630 * so we don't consume any subsequent CLC message or payload data 631 * in the TCP byte stream 632 */ 633 /* 634 * Caller must make sure that buflen is no less than 635 * sizeof(struct smc_clc_msg_hdr) 636 */ 637 krflags = MSG_PEEK | MSG_WAITALL; 638 clc_sk->sk_rcvtimeo = timeout; 639 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, 640 sizeof(struct smc_clc_msg_hdr)); 641 len = sock_recvmsg(smc->clcsock, &msg, krflags); 642 if (signal_pending(current)) { 643 reason_code = -EINTR; 644 clc_sk->sk_err = EINTR; 645 smc->sk.sk_err = EINTR; 646 goto out; 647 } 648 if (clc_sk->sk_err) { 649 reason_code = -clc_sk->sk_err; 650 if (clc_sk->sk_err == EAGAIN && 651 expected_type == SMC_CLC_DECLINE) 652 clc_sk->sk_err = 0; /* reset for fallback usage */ 653 else 654 smc->sk.sk_err = clc_sk->sk_err; 655 goto out; 656 } 657 if (!len) { /* peer has performed orderly shutdown */ 658 smc->sk.sk_err = ECONNRESET; 659 reason_code = -ECONNRESET; 660 goto out; 661 } 662 if (len < 0) { 663 if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE) 664 smc->sk.sk_err = -len; 665 reason_code = len; 666 goto out; 667 } 668 datlen = ntohs(clcm->length); 669 if ((len < sizeof(struct smc_clc_msg_hdr)) || 670 (clcm->version < SMC_V1) || 671 ((clcm->type != SMC_CLC_DECLINE) && 672 (clcm->type != expected_type))) { 673 smc->sk.sk_err = EPROTO; 674 reason_code = -EPROTO; 675 goto out; 676 } 677 678 /* receive the complete CLC message */ 679 memset(&msg, 0, sizeof(struct msghdr)); 680 if (datlen > buflen) { 681 check_trl = false; 682 recvlen = buflen; 683 } else { 684 recvlen = datlen; 685 } 686 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); 687 krflags = MSG_WAITALL; 688 len = sock_recvmsg(smc->clcsock, &msg, krflags); 689 if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { 690 smc->sk.sk_err = EPROTO; 691 reason_code = -EPROTO; 692 goto out; 693 } 694 datlen -= len; 695 while (datlen) { 696 u8 tmp[SMC_CLC_RECV_BUF_LEN]; 697 698 vec.iov_base = &tmp; 699 vec.iov_len = SMC_CLC_RECV_BUF_LEN; 700 /* receive remaining proposal message */ 701 recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? 702 SMC_CLC_RECV_BUF_LEN : datlen; 703 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); 704 len = sock_recvmsg(smc->clcsock, &msg, krflags); 705 datlen -= len; 706 } 707 if (clcm->type == SMC_CLC_DECLINE) { 708 struct smc_clc_msg_decline *dclc; 709 710 dclc = (struct smc_clc_msg_decline *)clcm; 711 reason_code = SMC_CLC_DECL_PEERDECL; 712 smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); 713 if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & 714 SMC_FIRST_CONTACT_MASK) { 715 smc->conn.lgr->sync_err = 1; 716 smc_lgr_terminate_sched(smc->conn.lgr); 717 } 718 } 719 720 out: 721 clc_sk->sk_rcvtimeo = rcvtimeo; 722 return reason_code; 723 } 724 725 /* send CLC DECLINE message across internal TCP socket */ 726 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) 727 { 728 struct smc_clc_msg_decline dclc; 729 struct msghdr msg; 730 struct kvec vec; 731 int len; 732 733 memset(&dclc, 0, sizeof(dclc)); 734 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 735 dclc.hdr.type = SMC_CLC_DECLINE; 736 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 737 dclc.hdr.version = version; 738 dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; 739 dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 740 SMC_FIRST_CONTACT_MASK : 0; 741 if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && 742 smc_ib_is_valid_local_systemid()) 743 memcpy(dclc.id_for_peer, local_systemid, 744 sizeof(local_systemid)); 745 dclc.peer_diagnosis = htonl(peer_diag_info); 746 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 747 748 memset(&msg, 0, sizeof(msg)); 749 vec.iov_base = &dclc; 750 vec.iov_len = sizeof(struct smc_clc_msg_decline); 751 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 752 sizeof(struct smc_clc_msg_decline)); 753 if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) 754 len = -EPROTO; 755 return len > 0 ? 0 : len; 756 } 757 758 /* send CLC PROPOSAL message across internal TCP socket */ 759 int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) 760 { 761 struct smc_clc_smcd_v2_extension *smcd_v2_ext; 762 struct smc_clc_msg_proposal_prefix *pclc_prfx; 763 struct smc_clc_msg_proposal *pclc_base; 764 struct smc_clc_smcd_gid_chid *gidchids; 765 struct smc_clc_msg_proposal_area *pclc; 766 struct smc_clc_ipv6_prefix *ipv6_prfx; 767 struct smc_clc_v2_extension *v2_ext; 768 struct smc_clc_msg_smcd *pclc_smcd; 769 struct smc_clc_msg_trail *trl; 770 int len, i, plen, rc; 771 int reason_code = 0; 772 struct kvec vec[8]; 773 struct msghdr msg; 774 775 pclc = kzalloc(sizeof(*pclc), GFP_KERNEL); 776 if (!pclc) 777 return -ENOMEM; 778 779 pclc_base = &pclc->pclc_base; 780 pclc_smcd = &pclc->pclc_smcd; 781 pclc_prfx = &pclc->pclc_prfx; 782 ipv6_prfx = pclc->pclc_prfx_ipv6; 783 v2_ext = &pclc->pclc_v2_ext; 784 smcd_v2_ext = &pclc->pclc_smcd_v2_ext; 785 gidchids = pclc->pclc_gidchids; 786 trl = &pclc->pclc_trl; 787 788 pclc_base->hdr.version = SMC_V2; 789 pclc_base->hdr.typev1 = ini->smc_type_v1; 790 pclc_base->hdr.typev2 = ini->smc_type_v2; 791 plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl); 792 793 /* retrieve ip prefixes for CLC proposal msg */ 794 if (ini->smc_type_v1 != SMC_TYPE_N) { 795 rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); 796 if (rc) { 797 if (ini->smc_type_v2 == SMC_TYPE_N) { 798 kfree(pclc); 799 return SMC_CLC_DECL_CNFERR; 800 } 801 pclc_base->hdr.typev1 = SMC_TYPE_N; 802 } else { 803 pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); 804 plen += sizeof(*pclc_prfx) + 805 pclc_prfx->ipv6_prefixes_cnt * 806 sizeof(ipv6_prfx[0]); 807 } 808 } 809 810 /* build SMC Proposal CLC message */ 811 memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER, 812 sizeof(SMC_EYECATCHER)); 813 pclc_base->hdr.type = SMC_CLC_PROPOSAL; 814 if (smcr_indicated(ini->smc_type_v1)) { 815 /* add SMC-R specifics */ 816 memcpy(pclc_base->lcl.id_for_peer, local_systemid, 817 sizeof(local_systemid)); 818 memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE); 819 memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], 820 ETH_ALEN); 821 } 822 if (smcd_indicated(ini->smc_type_v1)) { 823 /* add SMC-D specifics */ 824 if (ini->ism_dev[0]) { 825 pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid); 826 pclc_smcd->ism.chid = 827 htons(smc_ism_get_chid(ini->ism_dev[0])); 828 } 829 } 830 if (ini->smc_type_v2 == SMC_TYPE_N) { 831 pclc_smcd->v2_ext_offset = 0; 832 } else { 833 struct smc_clc_eid_entry *ueident; 834 u16 v2_ext_offset; 835 u8 *eid = NULL; 836 837 v2_ext_offset = sizeof(*pclc_smcd) - 838 offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); 839 if (ini->smc_type_v1 != SMC_TYPE_N) 840 v2_ext_offset += sizeof(*pclc_prfx) + 841 pclc_prfx->ipv6_prefixes_cnt * 842 sizeof(ipv6_prfx[0]); 843 pclc_smcd->v2_ext_offset = htons(v2_ext_offset); 844 845 read_lock(&smc_clc_eid_table.lock); 846 v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; 847 plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; 848 i = 0; 849 list_for_each_entry(ueident, &smc_clc_eid_table.list, list) { 850 memcpy(v2_ext->user_eids[i++], ueident->eid, 851 sizeof(ueident->eid)); 852 } 853 v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; 854 read_unlock(&smc_clc_eid_table.lock); 855 v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; 856 v2_ext->hdr.flag.release = SMC_RELEASE; 857 v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - 858 offsetofend(struct smc_clnt_opts_area_hdr, 859 smcd_v2_ext_offset) + 860 v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); 861 smc_ism_get_system_eid(&eid); 862 if (eid && v2_ext->hdr.flag.seid) 863 memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); 864 plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); 865 if (ini->ism_offered_cnt) { 866 for (i = 1; i <= ini->ism_offered_cnt; i++) { 867 gidchids[i - 1].gid = 868 htonll(ini->ism_dev[i]->local_gid); 869 gidchids[i - 1].chid = 870 htons(smc_ism_get_chid(ini->ism_dev[i])); 871 } 872 plen += ini->ism_offered_cnt * 873 sizeof(struct smc_clc_smcd_gid_chid); 874 } 875 } 876 pclc_base->hdr.length = htons(plen); 877 memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 878 879 /* send SMC Proposal CLC message */ 880 memset(&msg, 0, sizeof(msg)); 881 i = 0; 882 vec[i].iov_base = pclc_base; 883 vec[i++].iov_len = sizeof(*pclc_base); 884 vec[i].iov_base = pclc_smcd; 885 vec[i++].iov_len = sizeof(*pclc_smcd); 886 if (ini->smc_type_v1 != SMC_TYPE_N) { 887 vec[i].iov_base = pclc_prfx; 888 vec[i++].iov_len = sizeof(*pclc_prfx); 889 if (pclc_prfx->ipv6_prefixes_cnt > 0) { 890 vec[i].iov_base = ipv6_prfx; 891 vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * 892 sizeof(ipv6_prfx[0]); 893 } 894 } 895 if (ini->smc_type_v2 != SMC_TYPE_N) { 896 vec[i].iov_base = v2_ext; 897 vec[i++].iov_len = sizeof(*v2_ext) + 898 (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); 899 vec[i].iov_base = smcd_v2_ext; 900 vec[i++].iov_len = sizeof(*smcd_v2_ext); 901 if (ini->ism_offered_cnt) { 902 vec[i].iov_base = gidchids; 903 vec[i++].iov_len = ini->ism_offered_cnt * 904 sizeof(struct smc_clc_smcd_gid_chid); 905 } 906 } 907 vec[i].iov_base = trl; 908 vec[i++].iov_len = sizeof(*trl); 909 /* due to the few bytes needed for clc-handshake this cannot block */ 910 len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); 911 if (len < 0) { 912 smc->sk.sk_err = smc->clcsock->sk->sk_err; 913 reason_code = -smc->sk.sk_err; 914 } else if (len < ntohs(pclc_base->hdr.length)) { 915 reason_code = -ENETUNREACH; 916 smc->sk.sk_err = -reason_code; 917 } 918 919 kfree(pclc); 920 return reason_code; 921 } 922 923 /* build and send CLC CONFIRM / ACCEPT message */ 924 static int smc_clc_send_confirm_accept(struct smc_sock *smc, 925 struct smc_clc_msg_accept_confirm_v2 *clc_v2, 926 int first_contact, u8 version, 927 u8 *eid) 928 { 929 struct smc_connection *conn = &smc->conn; 930 struct smc_clc_msg_accept_confirm *clc; 931 struct smc_clc_first_contact_ext fce; 932 struct smc_clc_msg_trail trl; 933 struct kvec vec[3]; 934 struct msghdr msg; 935 int i, len; 936 937 /* send SMC Confirm CLC msg */ 938 clc = (struct smc_clc_msg_accept_confirm *)clc_v2; 939 clc->hdr.version = version; /* SMC version */ 940 if (first_contact) 941 clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; 942 if (conn->lgr->is_smcd) { 943 /* SMC-D specific settings */ 944 memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, 945 sizeof(SMCD_EYECATCHER)); 946 clc->hdr.typev1 = SMC_TYPE_D; 947 clc->d0.gid = conn->lgr->smcd->local_gid; 948 clc->d0.token = conn->rmb_desc->token; 949 clc->d0.dmbe_size = conn->rmbe_size_short; 950 clc->d0.dmbe_idx = 0; 951 memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); 952 if (version == SMC_V1) { 953 clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); 954 } else { 955 clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); 956 if (eid[0]) 957 memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); 958 len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; 959 if (first_contact) 960 smc_clc_fill_fce(&fce, &len); 961 clc_v2->hdr.length = htons(len); 962 } 963 memcpy(trl.eyecatcher, SMCD_EYECATCHER, 964 sizeof(SMCD_EYECATCHER)); 965 } else { 966 struct smc_link *link = conn->lnk; 967 968 /* SMC-R specific settings */ 969 link = conn->lnk; 970 memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, 971 sizeof(SMC_EYECATCHER)); 972 clc->hdr.typev1 = SMC_TYPE_R; 973 clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); 974 memcpy(clc->r0.lcl.id_for_peer, local_systemid, 975 sizeof(local_systemid)); 976 memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); 977 memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], 978 ETH_ALEN); 979 hton24(clc->r0.qpn, link->roce_qp->qp_num); 980 clc->r0.rmb_rkey = 981 htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); 982 clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ 983 clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); 984 switch (clc->hdr.type) { 985 case SMC_CLC_ACCEPT: 986 clc->r0.qp_mtu = link->path_mtu; 987 break; 988 case SMC_CLC_CONFIRM: 989 clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); 990 break; 991 } 992 clc->r0.rmbe_size = conn->rmbe_size_short; 993 clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address 994 (conn->rmb_desc->sgt[link->link_idx].sgl)); 995 hton24(clc->r0.psn, link->psn_initial); 996 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 997 } 998 999 memset(&msg, 0, sizeof(msg)); 1000 i = 0; 1001 vec[i].iov_base = clc_v2; 1002 if (version > SMC_V1) 1003 vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); 1004 else 1005 vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? 1006 SMCD_CLC_ACCEPT_CONFIRM_LEN : 1007 SMCR_CLC_ACCEPT_CONFIRM_LEN) - 1008 sizeof(trl); 1009 if (version > SMC_V1 && first_contact) { 1010 vec[i].iov_base = &fce; 1011 vec[i++].iov_len = sizeof(fce); 1012 } 1013 vec[i].iov_base = &trl; 1014 vec[i++].iov_len = sizeof(trl); 1015 return kernel_sendmsg(smc->clcsock, &msg, vec, 1, 1016 ntohs(clc->hdr.length)); 1017 } 1018 1019 /* send CLC CONFIRM message across internal TCP socket */ 1020 int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, 1021 u8 version, u8 *eid) 1022 { 1023 struct smc_clc_msg_accept_confirm_v2 cclc_v2; 1024 int reason_code = 0; 1025 int len; 1026 1027 /* send SMC Confirm CLC msg */ 1028 memset(&cclc_v2, 0, sizeof(cclc_v2)); 1029 cclc_v2.hdr.type = SMC_CLC_CONFIRM; 1030 len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, 1031 version, eid); 1032 if (len < ntohs(cclc_v2.hdr.length)) { 1033 if (len >= 0) { 1034 reason_code = -ENETUNREACH; 1035 smc->sk.sk_err = -reason_code; 1036 } else { 1037 smc->sk.sk_err = smc->clcsock->sk->sk_err; 1038 reason_code = -smc->sk.sk_err; 1039 } 1040 } 1041 return reason_code; 1042 } 1043 1044 /* send CLC ACCEPT message across internal TCP socket */ 1045 int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, 1046 u8 version, u8 *negotiated_eid) 1047 { 1048 struct smc_clc_msg_accept_confirm_v2 aclc_v2; 1049 int len; 1050 1051 memset(&aclc_v2, 0, sizeof(aclc_v2)); 1052 aclc_v2.hdr.type = SMC_CLC_ACCEPT; 1053 len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, 1054 version, negotiated_eid); 1055 if (len < ntohs(aclc_v2.hdr.length)) 1056 len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; 1057 1058 return len > 0 ? 0 : len; 1059 } 1060 1061 void smc_clc_get_hostname(u8 **host) 1062 { 1063 *host = &smc_hostname[0]; 1064 } 1065 1066 void __init smc_clc_init(void) 1067 { 1068 struct new_utsname *u; 1069 1070 memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */ 1071 u = utsname(); 1072 memcpy(smc_hostname, u->nodename, 1073 min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); 1074 1075 INIT_LIST_HEAD(&smc_clc_eid_table.list); 1076 rwlock_init(&smc_clc_eid_table.lock); 1077 smc_clc_eid_table.ueid_cnt = 0; 1078 smc_clc_eid_table.seid_enabled = 1; 1079 } 1080 1081 void smc_clc_exit(void) 1082 { 1083 smc_clc_ueid_remove(NULL); 1084 } 1085