1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * CLC (connection layer control) handshake over initial TCP socket to 6 * prepare for RDMA traffic 7 * 8 * Copyright IBM Corp. 2016, 2018 9 * 10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 11 */ 12 13 #include <linux/in.h> 14 #include <linux/inetdevice.h> 15 #include <linux/if_ether.h> 16 #include <linux/sched/signal.h> 17 #include <linux/utsname.h> 18 #include <linux/ctype.h> 19 20 #include <net/addrconf.h> 21 #include <net/sock.h> 22 #include <net/tcp.h> 23 24 #include "smc.h" 25 #include "smc_core.h" 26 #include "smc_clc.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_netlink.h" 30 31 #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 32 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 33 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 34 #define SMC_CLC_RECV_BUF_LEN 100 35 36 /* eye catcher "SMCR" EBCDIC for CLC messages */ 37 static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; 38 /* eye catcher "SMCD" EBCDIC for CLC messages */ 39 static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; 40 41 static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; 42 43 struct smc_clc_eid_table { 44 rwlock_t lock; 45 struct list_head list; 46 u8 ueid_cnt; 47 u8 seid_enabled; 48 }; 49 50 static struct smc_clc_eid_table smc_clc_eid_table; 51 52 struct smc_clc_eid_entry { 53 struct list_head list; 54 u8 eid[SMC_MAX_EID_LEN]; 55 }; 56 57 /* The size of a user EID is 32 characters. 58 * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'. 59 * Blanks should only be used to pad to the expected size. 60 * First character must be alphanumeric. 61 */ 62 static bool smc_clc_ueid_valid(char *ueid) 63 { 64 char *end = ueid + SMC_MAX_EID_LEN; 65 66 while (--end >= ueid && isspace(*end)) 67 ; 68 if (end < ueid) 69 return false; 70 if (!isalnum(*ueid) || islower(*ueid)) 71 return false; 72 while (ueid <= end) { 73 if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' && 74 *ueid != '-') 75 return false; 76 ueid++; 77 } 78 return true; 79 } 80 81 static int smc_clc_ueid_add(char *ueid) 82 { 83 struct smc_clc_eid_entry *new_ueid, *tmp_ueid; 84 int rc; 85 86 if (!smc_clc_ueid_valid(ueid)) 87 return -EINVAL; 88 89 /* add a new ueid entry to the ueid table if there isn't one */ 90 new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL); 91 if (!new_ueid) 92 return -ENOMEM; 93 memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN); 94 95 write_lock(&smc_clc_eid_table.lock); 96 if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) { 97 rc = -ERANGE; 98 goto err_out; 99 } 100 list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { 101 if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) { 102 rc = -EEXIST; 103 goto err_out; 104 } 105 } 106 list_add_tail(&new_ueid->list, &smc_clc_eid_table.list); 107 smc_clc_eid_table.ueid_cnt++; 108 write_unlock(&smc_clc_eid_table.lock); 109 return 0; 110 111 err_out: 112 write_unlock(&smc_clc_eid_table.lock); 113 kfree(new_ueid); 114 return rc; 115 } 116 117 int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) 118 { 119 struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; 120 char *ueid; 121 122 if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) 123 return -EINVAL; 124 ueid = (char *)nla_data(nla_ueid); 125 126 return smc_clc_ueid_add(ueid); 127 } 128 129 /* remove one or all ueid entries from the table */ 130 static int smc_clc_ueid_remove(char *ueid) 131 { 132 struct smc_clc_eid_entry *lst_ueid, *tmp_ueid; 133 int rc = -ENOENT; 134 135 /* remove table entry */ 136 write_lock(&smc_clc_eid_table.lock); 137 list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list, 138 list) { 139 if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) { 140 list_del(&lst_ueid->list); 141 smc_clc_eid_table.ueid_cnt--; 142 kfree(lst_ueid); 143 rc = 0; 144 } 145 } 146 if (!rc && !smc_clc_eid_table.ueid_cnt) { 147 smc_clc_eid_table.seid_enabled = 1; 148 rc = -EAGAIN; /* indicate success and enabling of seid */ 149 } 150 write_unlock(&smc_clc_eid_table.lock); 151 return rc; 152 } 153 154 int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info) 155 { 156 struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; 157 char *ueid; 158 159 if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) 160 return -EINVAL; 161 ueid = (char *)nla_data(nla_ueid); 162 163 return smc_clc_ueid_remove(ueid); 164 } 165 166 int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info) 167 { 168 smc_clc_ueid_remove(NULL); 169 return 0; 170 } 171 172 static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, 173 u32 flags, char *ueid) 174 { 175 char ueid_str[SMC_MAX_EID_LEN + 1]; 176 void *hdr; 177 178 hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family, 179 flags, SMC_NETLINK_DUMP_UEID); 180 if (!hdr) 181 return -ENOMEM; 182 snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); 183 if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { 184 genlmsg_cancel(skb, hdr); 185 return -EMSGSIZE; 186 } 187 genlmsg_end(skb, hdr); 188 return 0; 189 } 190 191 static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq, 192 int start_idx) 193 { 194 struct smc_clc_eid_entry *lst_ueid; 195 int idx = 0; 196 197 read_lock(&smc_clc_eid_table.lock); 198 list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) { 199 if (idx++ < start_idx) 200 continue; 201 if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI, 202 lst_ueid->eid)) { 203 --idx; 204 break; 205 } 206 } 207 read_unlock(&smc_clc_eid_table.lock); 208 return idx; 209 } 210 211 int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb) 212 { 213 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); 214 int idx; 215 216 idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid, 217 cb->nlh->nlmsg_seq, cb_ctx->pos[0]); 218 219 cb_ctx->pos[0] = idx; 220 return skb->len; 221 } 222 223 int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) 224 { 225 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); 226 char seid_str[SMC_MAX_EID_LEN + 1]; 227 u8 seid_enabled; 228 void *hdr; 229 u8 *seid; 230 231 if (cb_ctx->pos[0]) 232 return skb->len; 233 234 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 235 &smc_gen_nl_family, NLM_F_MULTI, 236 SMC_NETLINK_DUMP_SEID); 237 if (!hdr) 238 return -ENOMEM; 239 if (!smc_ism_is_v2_capable()) 240 goto end; 241 242 smc_ism_get_system_eid(&seid); 243 snprintf(seid_str, sizeof(seid_str), "%s", seid); 244 if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) 245 goto err; 246 read_lock(&smc_clc_eid_table.lock); 247 seid_enabled = smc_clc_eid_table.seid_enabled; 248 read_unlock(&smc_clc_eid_table.lock); 249 if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled)) 250 goto err; 251 end: 252 genlmsg_end(skb, hdr); 253 cb_ctx->pos[0]++; 254 return skb->len; 255 err: 256 genlmsg_cancel(skb, hdr); 257 return -EMSGSIZE; 258 } 259 260 int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) 261 { 262 write_lock(&smc_clc_eid_table.lock); 263 smc_clc_eid_table.seid_enabled = 1; 264 write_unlock(&smc_clc_eid_table.lock); 265 return 0; 266 } 267 268 int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) 269 { 270 int rc = 0; 271 272 write_lock(&smc_clc_eid_table.lock); 273 if (!smc_clc_eid_table.ueid_cnt) 274 rc = -ENOENT; 275 else 276 smc_clc_eid_table.seid_enabled = 0; 277 write_unlock(&smc_clc_eid_table.lock); 278 return rc; 279 } 280 281 static bool _smc_clc_match_ueid(u8 *peer_ueid) 282 { 283 struct smc_clc_eid_entry *tmp_ueid; 284 285 list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { 286 if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN)) 287 return true; 288 } 289 return false; 290 } 291 292 bool smc_clc_match_eid(u8 *negotiated_eid, 293 struct smc_clc_v2_extension *smc_v2_ext, 294 u8 *peer_eid, u8 *local_eid) 295 { 296 bool match = false; 297 int i; 298 299 negotiated_eid[0] = 0; 300 read_lock(&smc_clc_eid_table.lock); 301 if (smc_clc_eid_table.seid_enabled && 302 smc_v2_ext->hdr.flag.seid && 303 !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { 304 memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); 305 match = true; 306 goto out; 307 } 308 309 for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) { 310 if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) { 311 memcpy(negotiated_eid, smc_v2_ext->user_eids[i], 312 SMC_MAX_EID_LEN); 313 match = true; 314 goto out; 315 } 316 } 317 out: 318 read_unlock(&smc_clc_eid_table.lock); 319 return match; 320 } 321 322 /* check arriving CLC proposal */ 323 static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) 324 { 325 struct smc_clc_msg_proposal_prefix *pclc_prfx; 326 struct smc_clc_smcd_v2_extension *smcd_v2_ext; 327 struct smc_clc_msg_hdr *hdr = &pclc->hdr; 328 struct smc_clc_v2_extension *v2_ext; 329 330 v2_ext = smc_get_clc_v2_ext(pclc); 331 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 332 if (hdr->version == SMC_V1) { 333 if (hdr->typev1 == SMC_TYPE_N) 334 return false; 335 if (ntohs(hdr->length) != 336 sizeof(*pclc) + ntohs(pclc->iparea_offset) + 337 sizeof(*pclc_prfx) + 338 pclc_prfx->ipv6_prefixes_cnt * 339 sizeof(struct smc_clc_ipv6_prefix) + 340 sizeof(struct smc_clc_msg_trail)) 341 return false; 342 } else { 343 if (ntohs(hdr->length) != 344 sizeof(*pclc) + 345 sizeof(struct smc_clc_msg_smcd) + 346 (hdr->typev1 != SMC_TYPE_N ? 347 sizeof(*pclc_prfx) + 348 pclc_prfx->ipv6_prefixes_cnt * 349 sizeof(struct smc_clc_ipv6_prefix) : 0) + 350 (hdr->typev2 != SMC_TYPE_N ? 351 sizeof(*v2_ext) + 352 v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) + 353 (smcd_indicated(hdr->typev2) ? 354 sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt * 355 sizeof(struct smc_clc_smcd_gid_chid) : 356 0) + 357 sizeof(struct smc_clc_msg_trail)) 358 return false; 359 } 360 return true; 361 } 362 363 /* check arriving CLC accept or confirm */ 364 static bool 365 smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) 366 { 367 struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; 368 369 if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) 370 return false; 371 if (hdr->version == SMC_V1) { 372 if ((hdr->typev1 == SMC_TYPE_R && 373 ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || 374 (hdr->typev1 == SMC_TYPE_D && 375 ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) 376 return false; 377 } else { 378 if (hdr->typev1 == SMC_TYPE_D && 379 ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 && 380 (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + 381 sizeof(struct smc_clc_first_contact_ext))) 382 return false; 383 } 384 return true; 385 } 386 387 static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) 388 { 389 memset(fce, 0, sizeof(*fce)); 390 fce->os_type = SMC_CLC_OS_LINUX; 391 fce->release = SMC_RELEASE; 392 memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname)); 393 (*len) += sizeof(*fce); 394 } 395 396 /* check if received message has a correct header length and contains valid 397 * heading and trailing eyecatchers 398 */ 399 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) 400 { 401 struct smc_clc_msg_accept_confirm_v2 *clc_v2; 402 struct smc_clc_msg_proposal *pclc; 403 struct smc_clc_msg_decline *dclc; 404 struct smc_clc_msg_trail *trl; 405 406 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && 407 memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) 408 return false; 409 switch (clcm->type) { 410 case SMC_CLC_PROPOSAL: 411 pclc = (struct smc_clc_msg_proposal *)clcm; 412 if (!smc_clc_msg_prop_valid(pclc)) 413 return false; 414 trl = (struct smc_clc_msg_trail *) 415 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); 416 break; 417 case SMC_CLC_ACCEPT: 418 case SMC_CLC_CONFIRM: 419 clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; 420 if (!smc_clc_msg_acc_conf_valid(clc_v2)) 421 return false; 422 trl = (struct smc_clc_msg_trail *) 423 ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - 424 sizeof(*trl)); 425 break; 426 case SMC_CLC_DECLINE: 427 dclc = (struct smc_clc_msg_decline *)clcm; 428 if (ntohs(dclc->hdr.length) != sizeof(*dclc)) 429 return false; 430 trl = &dclc->trl; 431 break; 432 default: 433 return false; 434 } 435 if (check_trl && 436 memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && 437 memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) 438 return false; 439 return true; 440 } 441 442 /* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ 443 static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, 444 struct smc_clc_msg_proposal_prefix *prop) 445 { 446 struct in_device *in_dev = __in_dev_get_rcu(dst->dev); 447 const struct in_ifaddr *ifa; 448 449 if (!in_dev) 450 return -ENODEV; 451 452 in_dev_for_each_ifa_rcu(ifa, in_dev) { 453 if (!inet_ifa_match(ipv4, ifa)) 454 continue; 455 prop->prefix_len = inet_mask_len(ifa->ifa_mask); 456 prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; 457 /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ 458 return 0; 459 } 460 return -ENOENT; 461 } 462 463 /* fill CLC proposal msg with ipv6 prefixes from device */ 464 static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, 465 struct smc_clc_msg_proposal_prefix *prop, 466 struct smc_clc_ipv6_prefix *ipv6_prfx) 467 { 468 #if IS_ENABLED(CONFIG_IPV6) 469 struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); 470 struct inet6_ifaddr *ifa; 471 int cnt = 0; 472 473 if (!in6_dev) 474 return -ENODEV; 475 /* use a maximum of 8 IPv6 prefixes from device */ 476 list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { 477 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) 478 continue; 479 ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, 480 &ifa->addr, ifa->prefix_len); 481 ipv6_prfx[cnt].prefix_len = ifa->prefix_len; 482 cnt++; 483 if (cnt == SMC_CLC_MAX_V6_PREFIX) 484 break; 485 } 486 prop->ipv6_prefixes_cnt = cnt; 487 if (cnt) 488 return 0; 489 #endif 490 return -ENOENT; 491 } 492 493 /* retrieve and set prefixes in CLC proposal msg */ 494 static int smc_clc_prfx_set(struct socket *clcsock, 495 struct smc_clc_msg_proposal_prefix *prop, 496 struct smc_clc_ipv6_prefix *ipv6_prfx) 497 { 498 struct dst_entry *dst = sk_dst_get(clcsock->sk); 499 struct sockaddr_storage addrs; 500 struct sockaddr_in6 *addr6; 501 struct sockaddr_in *addr; 502 int rc = -ENOENT; 503 504 if (!dst) { 505 rc = -ENOTCONN; 506 goto out; 507 } 508 if (!dst->dev) { 509 rc = -ENODEV; 510 goto out_rel; 511 } 512 /* get address to which the internal TCP socket is bound */ 513 if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0) 514 goto out_rel; 515 /* analyze IP specific data of net_device belonging to TCP socket */ 516 addr6 = (struct sockaddr_in6 *)&addrs; 517 rcu_read_lock(); 518 if (addrs.ss_family == PF_INET) { 519 /* IPv4 */ 520 addr = (struct sockaddr_in *)&addrs; 521 rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); 522 } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { 523 /* mapped IPv4 address - peer is IPv4 only */ 524 rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], 525 prop); 526 } else { 527 /* IPv6 */ 528 rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); 529 } 530 rcu_read_unlock(); 531 out_rel: 532 dst_release(dst); 533 out: 534 return rc; 535 } 536 537 /* match ipv4 addrs of dev against addr in CLC proposal */ 538 static int smc_clc_prfx_match4_rcu(struct net_device *dev, 539 struct smc_clc_msg_proposal_prefix *prop) 540 { 541 struct in_device *in_dev = __in_dev_get_rcu(dev); 542 const struct in_ifaddr *ifa; 543 544 if (!in_dev) 545 return -ENODEV; 546 in_dev_for_each_ifa_rcu(ifa, in_dev) { 547 if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && 548 inet_ifa_match(prop->outgoing_subnet, ifa)) 549 return 0; 550 } 551 552 return -ENOENT; 553 } 554 555 /* match ipv6 addrs of dev against addrs in CLC proposal */ 556 static int smc_clc_prfx_match6_rcu(struct net_device *dev, 557 struct smc_clc_msg_proposal_prefix *prop) 558 { 559 #if IS_ENABLED(CONFIG_IPV6) 560 struct inet6_dev *in6_dev = __in6_dev_get(dev); 561 struct smc_clc_ipv6_prefix *ipv6_prfx; 562 struct inet6_ifaddr *ifa; 563 int i, max; 564 565 if (!in6_dev) 566 return -ENODEV; 567 /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ 568 ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); 569 max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); 570 list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { 571 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) 572 continue; 573 for (i = 0; i < max; i++) { 574 if (ifa->prefix_len == ipv6_prfx[i].prefix_len && 575 ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, 576 ifa->prefix_len)) 577 return 0; 578 } 579 } 580 #endif 581 return -ENOENT; 582 } 583 584 /* check if proposed prefixes match one of our device prefixes */ 585 int smc_clc_prfx_match(struct socket *clcsock, 586 struct smc_clc_msg_proposal_prefix *prop) 587 { 588 struct dst_entry *dst = sk_dst_get(clcsock->sk); 589 int rc; 590 591 if (!dst) { 592 rc = -ENOTCONN; 593 goto out; 594 } 595 if (!dst->dev) { 596 rc = -ENODEV; 597 goto out_rel; 598 } 599 rcu_read_lock(); 600 if (!prop->ipv6_prefixes_cnt) 601 rc = smc_clc_prfx_match4_rcu(dst->dev, prop); 602 else 603 rc = smc_clc_prfx_match6_rcu(dst->dev, prop); 604 rcu_read_unlock(); 605 out_rel: 606 dst_release(dst); 607 out: 608 return rc; 609 } 610 611 /* Wait for data on the tcp-socket, analyze received data 612 * Returns: 613 * 0 if success and it was not a decline that we received. 614 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 615 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 616 */ 617 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 618 u8 expected_type, unsigned long timeout) 619 { 620 long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo; 621 struct sock *clc_sk = smc->clcsock->sk; 622 struct smc_clc_msg_hdr *clcm = buf; 623 struct msghdr msg = {NULL, 0}; 624 int reason_code = 0; 625 struct kvec vec = {buf, buflen}; 626 int len, datlen, recvlen; 627 bool check_trl = true; 628 int krflags; 629 630 /* peek the first few bytes to determine length of data to receive 631 * so we don't consume any subsequent CLC message or payload data 632 * in the TCP byte stream 633 */ 634 /* 635 * Caller must make sure that buflen is no less than 636 * sizeof(struct smc_clc_msg_hdr) 637 */ 638 krflags = MSG_PEEK | MSG_WAITALL; 639 clc_sk->sk_rcvtimeo = timeout; 640 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, 641 sizeof(struct smc_clc_msg_hdr)); 642 len = sock_recvmsg(smc->clcsock, &msg, krflags); 643 if (signal_pending(current)) { 644 reason_code = -EINTR; 645 clc_sk->sk_err = EINTR; 646 smc->sk.sk_err = EINTR; 647 goto out; 648 } 649 if (clc_sk->sk_err) { 650 reason_code = -clc_sk->sk_err; 651 if (clc_sk->sk_err == EAGAIN && 652 expected_type == SMC_CLC_DECLINE) 653 clc_sk->sk_err = 0; /* reset for fallback usage */ 654 else 655 smc->sk.sk_err = clc_sk->sk_err; 656 goto out; 657 } 658 if (!len) { /* peer has performed orderly shutdown */ 659 smc->sk.sk_err = ECONNRESET; 660 reason_code = -ECONNRESET; 661 goto out; 662 } 663 if (len < 0) { 664 if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE) 665 smc->sk.sk_err = -len; 666 reason_code = len; 667 goto out; 668 } 669 datlen = ntohs(clcm->length); 670 if ((len < sizeof(struct smc_clc_msg_hdr)) || 671 (clcm->version < SMC_V1) || 672 ((clcm->type != SMC_CLC_DECLINE) && 673 (clcm->type != expected_type))) { 674 smc->sk.sk_err = EPROTO; 675 reason_code = -EPROTO; 676 goto out; 677 } 678 679 /* receive the complete CLC message */ 680 memset(&msg, 0, sizeof(struct msghdr)); 681 if (datlen > buflen) { 682 check_trl = false; 683 recvlen = buflen; 684 } else { 685 recvlen = datlen; 686 } 687 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); 688 krflags = MSG_WAITALL; 689 len = sock_recvmsg(smc->clcsock, &msg, krflags); 690 if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { 691 smc->sk.sk_err = EPROTO; 692 reason_code = -EPROTO; 693 goto out; 694 } 695 datlen -= len; 696 while (datlen) { 697 u8 tmp[SMC_CLC_RECV_BUF_LEN]; 698 699 vec.iov_base = &tmp; 700 vec.iov_len = SMC_CLC_RECV_BUF_LEN; 701 /* receive remaining proposal message */ 702 recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? 703 SMC_CLC_RECV_BUF_LEN : datlen; 704 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); 705 len = sock_recvmsg(smc->clcsock, &msg, krflags); 706 datlen -= len; 707 } 708 if (clcm->type == SMC_CLC_DECLINE) { 709 struct smc_clc_msg_decline *dclc; 710 711 dclc = (struct smc_clc_msg_decline *)clcm; 712 reason_code = SMC_CLC_DECL_PEERDECL; 713 smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); 714 if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & 715 SMC_FIRST_CONTACT_MASK) { 716 smc->conn.lgr->sync_err = 1; 717 smc_lgr_terminate_sched(smc->conn.lgr); 718 } 719 } 720 721 out: 722 clc_sk->sk_rcvtimeo = rcvtimeo; 723 return reason_code; 724 } 725 726 /* send CLC DECLINE message across internal TCP socket */ 727 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) 728 { 729 struct smc_clc_msg_decline dclc; 730 struct msghdr msg; 731 struct kvec vec; 732 int len; 733 734 memset(&dclc, 0, sizeof(dclc)); 735 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 736 dclc.hdr.type = SMC_CLC_DECLINE; 737 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 738 dclc.hdr.version = version; 739 dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; 740 dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 741 SMC_FIRST_CONTACT_MASK : 0; 742 if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && 743 smc_ib_is_valid_local_systemid()) 744 memcpy(dclc.id_for_peer, local_systemid, 745 sizeof(local_systemid)); 746 dclc.peer_diagnosis = htonl(peer_diag_info); 747 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 748 749 memset(&msg, 0, sizeof(msg)); 750 vec.iov_base = &dclc; 751 vec.iov_len = sizeof(struct smc_clc_msg_decline); 752 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 753 sizeof(struct smc_clc_msg_decline)); 754 if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) 755 len = -EPROTO; 756 return len > 0 ? 0 : len; 757 } 758 759 /* send CLC PROPOSAL message across internal TCP socket */ 760 int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) 761 { 762 struct smc_clc_smcd_v2_extension *smcd_v2_ext; 763 struct smc_clc_msg_proposal_prefix *pclc_prfx; 764 struct smc_clc_msg_proposal *pclc_base; 765 struct smc_clc_smcd_gid_chid *gidchids; 766 struct smc_clc_msg_proposal_area *pclc; 767 struct smc_clc_ipv6_prefix *ipv6_prfx; 768 struct smc_clc_v2_extension *v2_ext; 769 struct smc_clc_msg_smcd *pclc_smcd; 770 struct smc_clc_msg_trail *trl; 771 int len, i, plen, rc; 772 int reason_code = 0; 773 struct kvec vec[8]; 774 struct msghdr msg; 775 776 pclc = kzalloc(sizeof(*pclc), GFP_KERNEL); 777 if (!pclc) 778 return -ENOMEM; 779 780 pclc_base = &pclc->pclc_base; 781 pclc_smcd = &pclc->pclc_smcd; 782 pclc_prfx = &pclc->pclc_prfx; 783 ipv6_prfx = pclc->pclc_prfx_ipv6; 784 v2_ext = &pclc->pclc_v2_ext; 785 smcd_v2_ext = &pclc->pclc_smcd_v2_ext; 786 gidchids = pclc->pclc_gidchids; 787 trl = &pclc->pclc_trl; 788 789 pclc_base->hdr.version = SMC_V2; 790 pclc_base->hdr.typev1 = ini->smc_type_v1; 791 pclc_base->hdr.typev2 = ini->smc_type_v2; 792 plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl); 793 794 /* retrieve ip prefixes for CLC proposal msg */ 795 if (ini->smc_type_v1 != SMC_TYPE_N) { 796 rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); 797 if (rc) { 798 if (ini->smc_type_v2 == SMC_TYPE_N) { 799 kfree(pclc); 800 return SMC_CLC_DECL_CNFERR; 801 } 802 pclc_base->hdr.typev1 = SMC_TYPE_N; 803 } else { 804 pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); 805 plen += sizeof(*pclc_prfx) + 806 pclc_prfx->ipv6_prefixes_cnt * 807 sizeof(ipv6_prfx[0]); 808 } 809 } 810 811 /* build SMC Proposal CLC message */ 812 memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER, 813 sizeof(SMC_EYECATCHER)); 814 pclc_base->hdr.type = SMC_CLC_PROPOSAL; 815 if (smcr_indicated(ini->smc_type_v1)) { 816 /* add SMC-R specifics */ 817 memcpy(pclc_base->lcl.id_for_peer, local_systemid, 818 sizeof(local_systemid)); 819 memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE); 820 memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], 821 ETH_ALEN); 822 } 823 if (smcd_indicated(ini->smc_type_v1)) { 824 /* add SMC-D specifics */ 825 if (ini->ism_dev[0]) { 826 pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid); 827 pclc_smcd->ism.chid = 828 htons(smc_ism_get_chid(ini->ism_dev[0])); 829 } 830 } 831 if (ini->smc_type_v2 == SMC_TYPE_N) { 832 pclc_smcd->v2_ext_offset = 0; 833 } else { 834 struct smc_clc_eid_entry *ueident; 835 u16 v2_ext_offset; 836 u8 *eid = NULL; 837 838 v2_ext_offset = sizeof(*pclc_smcd) - 839 offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); 840 if (ini->smc_type_v1 != SMC_TYPE_N) 841 v2_ext_offset += sizeof(*pclc_prfx) + 842 pclc_prfx->ipv6_prefixes_cnt * 843 sizeof(ipv6_prfx[0]); 844 pclc_smcd->v2_ext_offset = htons(v2_ext_offset); 845 846 read_lock(&smc_clc_eid_table.lock); 847 v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; 848 plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; 849 i = 0; 850 list_for_each_entry(ueident, &smc_clc_eid_table.list, list) { 851 memcpy(v2_ext->user_eids[i++], ueident->eid, 852 sizeof(ueident->eid)); 853 } 854 v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; 855 read_unlock(&smc_clc_eid_table.lock); 856 v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; 857 v2_ext->hdr.flag.release = SMC_RELEASE; 858 v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - 859 offsetofend(struct smc_clnt_opts_area_hdr, 860 smcd_v2_ext_offset) + 861 v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); 862 smc_ism_get_system_eid(&eid); 863 if (eid && v2_ext->hdr.flag.seid) 864 memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); 865 plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); 866 if (ini->ism_offered_cnt) { 867 for (i = 1; i <= ini->ism_offered_cnt; i++) { 868 gidchids[i - 1].gid = 869 htonll(ini->ism_dev[i]->local_gid); 870 gidchids[i - 1].chid = 871 htons(smc_ism_get_chid(ini->ism_dev[i])); 872 } 873 plen += ini->ism_offered_cnt * 874 sizeof(struct smc_clc_smcd_gid_chid); 875 } 876 } 877 pclc_base->hdr.length = htons(plen); 878 memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 879 880 /* send SMC Proposal CLC message */ 881 memset(&msg, 0, sizeof(msg)); 882 i = 0; 883 vec[i].iov_base = pclc_base; 884 vec[i++].iov_len = sizeof(*pclc_base); 885 vec[i].iov_base = pclc_smcd; 886 vec[i++].iov_len = sizeof(*pclc_smcd); 887 if (ini->smc_type_v1 != SMC_TYPE_N) { 888 vec[i].iov_base = pclc_prfx; 889 vec[i++].iov_len = sizeof(*pclc_prfx); 890 if (pclc_prfx->ipv6_prefixes_cnt > 0) { 891 vec[i].iov_base = ipv6_prfx; 892 vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * 893 sizeof(ipv6_prfx[0]); 894 } 895 } 896 if (ini->smc_type_v2 != SMC_TYPE_N) { 897 vec[i].iov_base = v2_ext; 898 vec[i++].iov_len = sizeof(*v2_ext) + 899 (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); 900 vec[i].iov_base = smcd_v2_ext; 901 vec[i++].iov_len = sizeof(*smcd_v2_ext); 902 if (ini->ism_offered_cnt) { 903 vec[i].iov_base = gidchids; 904 vec[i++].iov_len = ini->ism_offered_cnt * 905 sizeof(struct smc_clc_smcd_gid_chid); 906 } 907 } 908 vec[i].iov_base = trl; 909 vec[i++].iov_len = sizeof(*trl); 910 /* due to the few bytes needed for clc-handshake this cannot block */ 911 len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); 912 if (len < 0) { 913 smc->sk.sk_err = smc->clcsock->sk->sk_err; 914 reason_code = -smc->sk.sk_err; 915 } else if (len < ntohs(pclc_base->hdr.length)) { 916 reason_code = -ENETUNREACH; 917 smc->sk.sk_err = -reason_code; 918 } 919 920 kfree(pclc); 921 return reason_code; 922 } 923 924 /* build and send CLC CONFIRM / ACCEPT message */ 925 static int smc_clc_send_confirm_accept(struct smc_sock *smc, 926 struct smc_clc_msg_accept_confirm_v2 *clc_v2, 927 int first_contact, u8 version, 928 u8 *eid) 929 { 930 struct smc_connection *conn = &smc->conn; 931 struct smc_clc_msg_accept_confirm *clc; 932 struct smc_clc_first_contact_ext fce; 933 struct smc_clc_msg_trail trl; 934 struct kvec vec[3]; 935 struct msghdr msg; 936 int i, len; 937 938 /* send SMC Confirm CLC msg */ 939 clc = (struct smc_clc_msg_accept_confirm *)clc_v2; 940 clc->hdr.version = version; /* SMC version */ 941 if (first_contact) 942 clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; 943 if (conn->lgr->is_smcd) { 944 /* SMC-D specific settings */ 945 memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, 946 sizeof(SMCD_EYECATCHER)); 947 clc->hdr.typev1 = SMC_TYPE_D; 948 clc->d0.gid = conn->lgr->smcd->local_gid; 949 clc->d0.token = conn->rmb_desc->token; 950 clc->d0.dmbe_size = conn->rmbe_size_short; 951 clc->d0.dmbe_idx = 0; 952 memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); 953 if (version == SMC_V1) { 954 clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); 955 } else { 956 clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); 957 if (eid[0]) 958 memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); 959 len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; 960 if (first_contact) 961 smc_clc_fill_fce(&fce, &len); 962 clc_v2->hdr.length = htons(len); 963 } 964 memcpy(trl.eyecatcher, SMCD_EYECATCHER, 965 sizeof(SMCD_EYECATCHER)); 966 } else { 967 struct smc_link *link = conn->lnk; 968 969 /* SMC-R specific settings */ 970 link = conn->lnk; 971 memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, 972 sizeof(SMC_EYECATCHER)); 973 clc->hdr.typev1 = SMC_TYPE_R; 974 clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); 975 memcpy(clc->r0.lcl.id_for_peer, local_systemid, 976 sizeof(local_systemid)); 977 memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); 978 memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], 979 ETH_ALEN); 980 hton24(clc->r0.qpn, link->roce_qp->qp_num); 981 clc->r0.rmb_rkey = 982 htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); 983 clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ 984 clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); 985 switch (clc->hdr.type) { 986 case SMC_CLC_ACCEPT: 987 clc->r0.qp_mtu = link->path_mtu; 988 break; 989 case SMC_CLC_CONFIRM: 990 clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); 991 break; 992 } 993 clc->r0.rmbe_size = conn->rmbe_size_short; 994 clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address 995 (conn->rmb_desc->sgt[link->link_idx].sgl)); 996 hton24(clc->r0.psn, link->psn_initial); 997 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 998 } 999 1000 memset(&msg, 0, sizeof(msg)); 1001 i = 0; 1002 vec[i].iov_base = clc_v2; 1003 if (version > SMC_V1) 1004 vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); 1005 else 1006 vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? 1007 SMCD_CLC_ACCEPT_CONFIRM_LEN : 1008 SMCR_CLC_ACCEPT_CONFIRM_LEN) - 1009 sizeof(trl); 1010 if (version > SMC_V1 && first_contact) { 1011 vec[i].iov_base = &fce; 1012 vec[i++].iov_len = sizeof(fce); 1013 } 1014 vec[i].iov_base = &trl; 1015 vec[i++].iov_len = sizeof(trl); 1016 return kernel_sendmsg(smc->clcsock, &msg, vec, 1, 1017 ntohs(clc->hdr.length)); 1018 } 1019 1020 /* send CLC CONFIRM message across internal TCP socket */ 1021 int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, 1022 u8 version, u8 *eid) 1023 { 1024 struct smc_clc_msg_accept_confirm_v2 cclc_v2; 1025 int reason_code = 0; 1026 int len; 1027 1028 /* send SMC Confirm CLC msg */ 1029 memset(&cclc_v2, 0, sizeof(cclc_v2)); 1030 cclc_v2.hdr.type = SMC_CLC_CONFIRM; 1031 len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, 1032 version, eid); 1033 if (len < ntohs(cclc_v2.hdr.length)) { 1034 if (len >= 0) { 1035 reason_code = -ENETUNREACH; 1036 smc->sk.sk_err = -reason_code; 1037 } else { 1038 smc->sk.sk_err = smc->clcsock->sk->sk_err; 1039 reason_code = -smc->sk.sk_err; 1040 } 1041 } 1042 return reason_code; 1043 } 1044 1045 /* send CLC ACCEPT message across internal TCP socket */ 1046 int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, 1047 u8 version, u8 *negotiated_eid) 1048 { 1049 struct smc_clc_msg_accept_confirm_v2 aclc_v2; 1050 int len; 1051 1052 memset(&aclc_v2, 0, sizeof(aclc_v2)); 1053 aclc_v2.hdr.type = SMC_CLC_ACCEPT; 1054 len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, 1055 version, negotiated_eid); 1056 if (len < ntohs(aclc_v2.hdr.length)) 1057 len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; 1058 1059 return len > 0 ? 0 : len; 1060 } 1061 1062 void smc_clc_get_hostname(u8 **host) 1063 { 1064 *host = &smc_hostname[0]; 1065 } 1066 1067 void __init smc_clc_init(void) 1068 { 1069 struct new_utsname *u; 1070 1071 memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */ 1072 u = utsname(); 1073 memcpy(smc_hostname, u->nodename, 1074 min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); 1075 1076 INIT_LIST_HEAD(&smc_clc_eid_table.list); 1077 rwlock_init(&smc_clc_eid_table.lock); 1078 smc_clc_eid_table.ueid_cnt = 0; 1079 smc_clc_eid_table.seid_enabled = 1; 1080 } 1081 1082 void smc_clc_exit(void) 1083 { 1084 smc_clc_ueid_remove(NULL); 1085 } 1086