1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/ib_cache.h> 20 21 #include "smc.h" 22 #include "smc_clc.h" 23 #include "smc_core.h" 24 #include "smc_ib.h" 25 #include "smc_wr.h" 26 #include "smc_llc.h" 27 #include "smc_cdc.h" 28 #include "smc_close.h" 29 #include "smc_ism.h" 30 31 #define SMC_LGR_NUM_INCR 256 32 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 33 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 34 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 35 36 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 38 .list = LIST_HEAD_INIT(smc_lgr_list.list), 39 .num = 0, 40 }; 41 42 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 43 struct smc_buf_desc *buf_desc); 44 45 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 46 { 47 /* client link group creation always follows the server link group 48 * creation. For client use a somewhat higher removal delay time, 49 * otherwise there is a risk of out-of-sync link groups. 50 */ 51 mod_delayed_work(system_wq, &lgr->free_work, 52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); 54 } 55 56 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 57 { 58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST); 59 } 60 61 /* Register connection's alert token in our lookup structure. 62 * To use rbtrees we have to implement our own insert core. 63 * Requires @conns_lock 64 * @smc connection to register 65 * Returns 0 on success, != otherwise. 66 */ 67 static void smc_lgr_add_alert_token(struct smc_connection *conn) 68 { 69 struct rb_node **link, *parent = NULL; 70 u32 token = conn->alert_token_local; 71 72 link = &conn->lgr->conns_all.rb_node; 73 while (*link) { 74 struct smc_connection *cur = rb_entry(*link, 75 struct smc_connection, alert_node); 76 77 parent = *link; 78 if (cur->alert_token_local > token) 79 link = &parent->rb_left; 80 else 81 link = &parent->rb_right; 82 } 83 /* Put the new node there */ 84 rb_link_node(&conn->alert_node, parent, link); 85 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 86 } 87 88 /* Register connection in link group by assigning an alert token 89 * registered in a search tree. 90 * Requires @conns_lock 91 * Note that '0' is a reserved value and not assigned. 92 */ 93 static void smc_lgr_register_conn(struct smc_connection *conn) 94 { 95 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 96 static atomic_t nexttoken = ATOMIC_INIT(0); 97 98 /* find a new alert_token_local value not yet used by some connection 99 * in this link group 100 */ 101 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 102 while (!conn->alert_token_local) { 103 conn->alert_token_local = atomic_inc_return(&nexttoken); 104 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 105 conn->alert_token_local = 0; 106 } 107 smc_lgr_add_alert_token(conn); 108 conn->lgr->conns_num++; 109 } 110 111 /* Unregister connection and reset the alert token of the given connection< 112 */ 113 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 114 { 115 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 116 struct smc_link_group *lgr = conn->lgr; 117 118 rb_erase(&conn->alert_node, &lgr->conns_all); 119 lgr->conns_num--; 120 conn->alert_token_local = 0; 121 conn->lgr = NULL; 122 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 123 } 124 125 /* Unregister connection from lgr 126 */ 127 static void smc_lgr_unregister_conn(struct smc_connection *conn) 128 { 129 struct smc_link_group *lgr = conn->lgr; 130 131 if (!lgr) 132 return; 133 write_lock_bh(&lgr->conns_lock); 134 if (conn->alert_token_local) { 135 __smc_lgr_unregister_conn(conn); 136 } 137 write_unlock_bh(&lgr->conns_lock); 138 } 139 140 /* Send delete link, either as client to request the initiation 141 * of the DELETE LINK sequence from server; or as server to 142 * initiate the delete processing. See smc_llc_rx_delete_link(). 143 */ 144 static int smc_link_send_delete(struct smc_link *lnk) 145 { 146 if (lnk->state == SMC_LNK_ACTIVE && 147 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { 148 smc_llc_link_deleting(lnk); 149 return 0; 150 } 151 return -ENOTCONN; 152 } 153 154 static void smc_lgr_free(struct smc_link_group *lgr); 155 156 static void smc_lgr_free_work(struct work_struct *work) 157 { 158 struct smc_link_group *lgr = container_of(to_delayed_work(work), 159 struct smc_link_group, 160 free_work); 161 bool conns; 162 163 spin_lock_bh(&smc_lgr_list.lock); 164 if (list_empty(&lgr->list)) 165 goto free; 166 read_lock_bh(&lgr->conns_lock); 167 conns = RB_EMPTY_ROOT(&lgr->conns_all); 168 read_unlock_bh(&lgr->conns_lock); 169 if (!conns) { /* number of lgr connections is no longer zero */ 170 spin_unlock_bh(&smc_lgr_list.lock); 171 return; 172 } 173 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 174 free: 175 spin_unlock_bh(&smc_lgr_list.lock); 176 177 if (!lgr->is_smcd && !lgr->terminating) { 178 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 179 180 /* try to send del link msg, on error free lgr immediately */ 181 if (lnk->state == SMC_LNK_ACTIVE && 182 !smc_link_send_delete(lnk)) { 183 /* reschedule in case we never receive a response */ 184 smc_lgr_schedule_free_work(lgr); 185 return; 186 } 187 } 188 189 if (!delayed_work_pending(&lgr->free_work)) { 190 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 191 192 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) 193 smc_llc_link_inactive(lnk); 194 if (lgr->is_smcd) 195 smc_ism_signal_shutdown(lgr); 196 smc_lgr_free(lgr); 197 } 198 } 199 200 /* create a new SMC link group */ 201 static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, 202 struct smc_ib_device *smcibdev, u8 ibport, 203 char *peer_systemid, unsigned short vlan_id, 204 struct smcd_dev *smcismdev, u64 peer_gid) 205 { 206 struct smc_link_group *lgr; 207 struct smc_link *lnk; 208 u8 rndvec[3]; 209 int rc = 0; 210 int i; 211 212 if (is_smcd && vlan_id) { 213 rc = smc_ism_get_vlan(smcismdev, vlan_id); 214 if (rc) 215 goto out; 216 } 217 218 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 219 if (!lgr) { 220 rc = -ENOMEM; 221 goto out; 222 } 223 lgr->is_smcd = is_smcd; 224 lgr->sync_err = 0; 225 lgr->vlan_id = vlan_id; 226 rwlock_init(&lgr->sndbufs_lock); 227 rwlock_init(&lgr->rmbs_lock); 228 rwlock_init(&lgr->conns_lock); 229 for (i = 0; i < SMC_RMBE_SIZES; i++) { 230 INIT_LIST_HEAD(&lgr->sndbufs[i]); 231 INIT_LIST_HEAD(&lgr->rmbs[i]); 232 } 233 smc_lgr_list.num += SMC_LGR_NUM_INCR; 234 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 235 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 236 lgr->conns_all = RB_ROOT; 237 if (is_smcd) { 238 /* SMC-D specific settings */ 239 lgr->peer_gid = peer_gid; 240 lgr->smcd = smcismdev; 241 } else { 242 /* SMC-R specific settings */ 243 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 244 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); 245 246 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 247 /* initialize link */ 248 lnk->state = SMC_LNK_ACTIVATING; 249 lnk->link_id = SMC_SINGLE_LINK; 250 lnk->smcibdev = smcibdev; 251 lnk->ibport = ibport; 252 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 253 if (!smcibdev->initialized) 254 smc_ib_setup_per_ibdev(smcibdev); 255 get_random_bytes(rndvec, sizeof(rndvec)); 256 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 257 (rndvec[2] << 16); 258 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 259 vlan_id, lnk->gid, &lnk->sgid_index); 260 if (rc) 261 goto free_lgr; 262 rc = smc_llc_link_init(lnk); 263 if (rc) 264 goto free_lgr; 265 rc = smc_wr_alloc_link_mem(lnk); 266 if (rc) 267 goto clear_llc_lnk; 268 rc = smc_ib_create_protection_domain(lnk); 269 if (rc) 270 goto free_link_mem; 271 rc = smc_ib_create_queue_pair(lnk); 272 if (rc) 273 goto dealloc_pd; 274 rc = smc_wr_create_link(lnk); 275 if (rc) 276 goto destroy_qp; 277 } 278 smc->conn.lgr = lgr; 279 spin_lock_bh(&smc_lgr_list.lock); 280 list_add(&lgr->list, &smc_lgr_list.list); 281 spin_unlock_bh(&smc_lgr_list.lock); 282 return 0; 283 284 destroy_qp: 285 smc_ib_destroy_queue_pair(lnk); 286 dealloc_pd: 287 smc_ib_dealloc_protection_domain(lnk); 288 free_link_mem: 289 smc_wr_free_link_mem(lnk); 290 clear_llc_lnk: 291 smc_llc_link_clear(lnk); 292 free_lgr: 293 kfree(lgr); 294 out: 295 return rc; 296 } 297 298 static void smc_buf_unuse(struct smc_connection *conn, 299 struct smc_link_group *lgr) 300 { 301 if (conn->sndbuf_desc) 302 conn->sndbuf_desc->used = 0; 303 if (conn->rmb_desc) { 304 if (!conn->rmb_desc->regerr) { 305 if (!lgr->is_smcd) { 306 /* unregister rmb with peer */ 307 smc_llc_do_delete_rkey( 308 &lgr->lnk[SMC_SINGLE_LINK], 309 conn->rmb_desc); 310 } 311 conn->rmb_desc->used = 0; 312 } else { 313 /* buf registration failed, reuse not possible */ 314 write_lock_bh(&lgr->rmbs_lock); 315 list_del(&conn->rmb_desc->list); 316 write_unlock_bh(&lgr->rmbs_lock); 317 318 smc_buf_free(lgr, true, conn->rmb_desc); 319 } 320 } 321 } 322 323 /* remove a finished connection from its link group */ 324 void smc_conn_free(struct smc_connection *conn) 325 { 326 struct smc_link_group *lgr = conn->lgr; 327 328 if (!lgr) 329 return; 330 if (lgr->is_smcd) { 331 smc_ism_unset_conn(conn); 332 tasklet_kill(&conn->rx_tsklet); 333 } else { 334 smc_cdc_tx_dismiss_slots(conn); 335 } 336 smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ 337 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 338 339 if (!lgr->conns_num) 340 smc_lgr_schedule_free_work(lgr); 341 } 342 343 static void smc_link_clear(struct smc_link *lnk) 344 { 345 lnk->peer_qpn = 0; 346 smc_llc_link_clear(lnk); 347 smc_ib_modify_qp_reset(lnk); 348 smc_wr_free_link(lnk); 349 smc_ib_destroy_queue_pair(lnk); 350 smc_ib_dealloc_protection_domain(lnk); 351 smc_wr_free_link_mem(lnk); 352 } 353 354 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 355 struct smc_buf_desc *buf_desc) 356 { 357 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 358 359 if (is_rmb) { 360 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 361 smc_ib_put_memory_region( 362 buf_desc->mr_rx[SMC_SINGLE_LINK]); 363 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 364 DMA_FROM_DEVICE); 365 } else { 366 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 367 DMA_TO_DEVICE); 368 } 369 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 370 if (buf_desc->pages) 371 __free_pages(buf_desc->pages, buf_desc->order); 372 kfree(buf_desc); 373 } 374 375 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 376 struct smc_buf_desc *buf_desc) 377 { 378 if (is_dmb) { 379 /* restore original buf len */ 380 buf_desc->len += sizeof(struct smcd_cdc_msg); 381 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 382 } else { 383 kfree(buf_desc->cpu_addr); 384 } 385 kfree(buf_desc); 386 } 387 388 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 389 struct smc_buf_desc *buf_desc) 390 { 391 if (lgr->is_smcd) 392 smcd_buf_free(lgr, is_rmb, buf_desc); 393 else 394 smcr_buf_free(lgr, is_rmb, buf_desc); 395 } 396 397 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 398 { 399 struct smc_buf_desc *buf_desc, *bf_desc; 400 struct list_head *buf_list; 401 int i; 402 403 for (i = 0; i < SMC_RMBE_SIZES; i++) { 404 if (is_rmb) 405 buf_list = &lgr->rmbs[i]; 406 else 407 buf_list = &lgr->sndbufs[i]; 408 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 409 list) { 410 list_del(&buf_desc->list); 411 smc_buf_free(lgr, is_rmb, buf_desc); 412 } 413 } 414 } 415 416 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 417 { 418 /* free send buffers */ 419 __smc_lgr_free_bufs(lgr, false); 420 /* free rmbs */ 421 __smc_lgr_free_bufs(lgr, true); 422 } 423 424 /* remove a link group */ 425 static void smc_lgr_free(struct smc_link_group *lgr) 426 { 427 smc_lgr_free_bufs(lgr); 428 if (lgr->is_smcd) 429 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 430 else 431 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 432 kfree(lgr); 433 } 434 435 void smc_lgr_forget(struct smc_link_group *lgr) 436 { 437 spin_lock_bh(&smc_lgr_list.lock); 438 /* do not use this link group for new connections */ 439 if (!list_empty(&lgr->list)) 440 list_del_init(&lgr->list); 441 spin_unlock_bh(&smc_lgr_list.lock); 442 } 443 444 /* terminate linkgroup abnormally */ 445 static void __smc_lgr_terminate(struct smc_link_group *lgr) 446 { 447 struct smc_connection *conn; 448 struct smc_sock *smc; 449 struct rb_node *node; 450 451 if (lgr->terminating) 452 return; /* lgr already terminating */ 453 lgr->terminating = 1; 454 if (!list_empty(&lgr->list)) /* forget lgr */ 455 list_del_init(&lgr->list); 456 if (!lgr->is_smcd) 457 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 458 459 write_lock_bh(&lgr->conns_lock); 460 node = rb_first(&lgr->conns_all); 461 while (node) { 462 conn = rb_entry(node, struct smc_connection, alert_node); 463 smc = container_of(conn, struct smc_sock, conn); 464 sock_hold(&smc->sk); /* sock_put in close work */ 465 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 466 __smc_lgr_unregister_conn(conn); 467 write_unlock_bh(&lgr->conns_lock); 468 if (!schedule_work(&conn->close_work)) 469 sock_put(&smc->sk); 470 write_lock_bh(&lgr->conns_lock); 471 node = rb_first(&lgr->conns_all); 472 } 473 write_unlock_bh(&lgr->conns_lock); 474 if (!lgr->is_smcd) 475 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 476 smc_lgr_schedule_free_work(lgr); 477 } 478 479 void smc_lgr_terminate(struct smc_link_group *lgr) 480 { 481 spin_lock_bh(&smc_lgr_list.lock); 482 __smc_lgr_terminate(lgr); 483 spin_unlock_bh(&smc_lgr_list.lock); 484 } 485 486 /* Called when IB port is terminated */ 487 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) 488 { 489 struct smc_link_group *lgr, *l; 490 491 spin_lock_bh(&smc_lgr_list.lock); 492 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 493 if (!lgr->is_smcd && 494 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && 495 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) 496 __smc_lgr_terminate(lgr); 497 } 498 spin_unlock_bh(&smc_lgr_list.lock); 499 } 500 501 /* Called when SMC-D device is terminated or peer is lost */ 502 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 503 { 504 struct smc_link_group *lgr, *l; 505 LIST_HEAD(lgr_free_list); 506 507 /* run common cleanup function and build free list */ 508 spin_lock_bh(&smc_lgr_list.lock); 509 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 510 if (lgr->is_smcd && lgr->smcd == dev && 511 (!peer_gid || lgr->peer_gid == peer_gid) && 512 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 513 __smc_lgr_terminate(lgr); 514 list_move(&lgr->list, &lgr_free_list); 515 } 516 } 517 spin_unlock_bh(&smc_lgr_list.lock); 518 519 /* cancel the regular free workers and actually free lgrs */ 520 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 521 list_del_init(&lgr->list); 522 cancel_delayed_work_sync(&lgr->free_work); 523 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */ 524 smc_ism_signal_shutdown(lgr); 525 smc_lgr_free(lgr); 526 } 527 } 528 529 /* Determine vlan of internal TCP socket. 530 * @vlan_id: address to store the determined vlan id into 531 */ 532 int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 533 { 534 struct dst_entry *dst = sk_dst_get(clcsock->sk); 535 struct net_device *ndev; 536 int i, nest_lvl, rc = 0; 537 538 *vlan_id = 0; 539 if (!dst) { 540 rc = -ENOTCONN; 541 goto out; 542 } 543 if (!dst->dev) { 544 rc = -ENODEV; 545 goto out_rel; 546 } 547 548 ndev = dst->dev; 549 if (is_vlan_dev(ndev)) { 550 *vlan_id = vlan_dev_vlan_id(ndev); 551 goto out_rel; 552 } 553 554 rtnl_lock(); 555 nest_lvl = dev_get_nest_level(ndev); 556 for (i = 0; i < nest_lvl; i++) { 557 struct list_head *lower = &ndev->adj_list.lower; 558 559 if (list_empty(lower)) 560 break; 561 lower = lower->next; 562 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 563 if (is_vlan_dev(ndev)) { 564 *vlan_id = vlan_dev_vlan_id(ndev); 565 break; 566 } 567 } 568 rtnl_unlock(); 569 570 out_rel: 571 dst_release(dst); 572 out: 573 return rc; 574 } 575 576 static bool smcr_lgr_match(struct smc_link_group *lgr, 577 struct smc_clc_msg_local *lcl, 578 enum smc_lgr_role role, u32 clcqpn) 579 { 580 return !memcmp(lgr->peer_systemid, lcl->id_for_peer, 581 SMC_SYSTEMID_LEN) && 582 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 583 SMC_GID_SIZE) && 584 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 585 sizeof(lcl->mac)) && 586 lgr->role == role && 587 (lgr->role == SMC_SERV || 588 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn); 589 } 590 591 static bool smcd_lgr_match(struct smc_link_group *lgr, 592 struct smcd_dev *smcismdev, u64 peer_gid) 593 { 594 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 595 } 596 597 /* create a new SMC connection (and a new link group if necessary) */ 598 int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, 599 struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn, 600 struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, 601 u64 peer_gid) 602 { 603 struct smc_connection *conn = &smc->conn; 604 int local_contact = SMC_FIRST_CONTACT; 605 struct smc_link_group *lgr; 606 unsigned short vlan_id; 607 enum smc_lgr_role role; 608 int rc = 0; 609 610 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 611 rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); 612 if (rc) 613 return rc; 614 615 if ((role == SMC_CLNT) && srv_first_contact) 616 /* create new link group as well */ 617 goto create; 618 619 /* determine if an existing link group can be reused */ 620 spin_lock_bh(&smc_lgr_list.lock); 621 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 622 write_lock_bh(&lgr->conns_lock); 623 if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) : 624 smcr_lgr_match(lgr, lcl, role, clcqpn)) && 625 !lgr->sync_err && 626 lgr->vlan_id == vlan_id && 627 (role == SMC_CLNT || 628 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 629 /* link group found */ 630 local_contact = SMC_REUSE_CONTACT; 631 conn->lgr = lgr; 632 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 633 if (delayed_work_pending(&lgr->free_work)) 634 cancel_delayed_work(&lgr->free_work); 635 write_unlock_bh(&lgr->conns_lock); 636 break; 637 } 638 write_unlock_bh(&lgr->conns_lock); 639 } 640 spin_unlock_bh(&smc_lgr_list.lock); 641 642 if (role == SMC_CLNT && !srv_first_contact && 643 (local_contact == SMC_FIRST_CONTACT)) { 644 /* Server reuses a link group, but Client wants to start 645 * a new one 646 * send out_of_sync decline, reason synchr. error 647 */ 648 return -ENOLINK; 649 } 650 651 create: 652 if (local_contact == SMC_FIRST_CONTACT) { 653 rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport, 654 lcl->id_for_peer, vlan_id, smcd, peer_gid); 655 if (rc) 656 goto out; 657 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 658 } 659 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 660 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 661 conn->urg_state = SMC_URG_READ; 662 if (is_smcd) { 663 conn->rx_off = sizeof(struct smcd_cdc_msg); 664 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 665 } 666 #ifndef KERNEL_HAS_ATOMIC64 667 spin_lock_init(&conn->acurs_lock); 668 #endif 669 670 out: 671 return rc ? rc : local_contact; 672 } 673 674 /* convert the RMB size into the compressed notation - minimum 16K. 675 * In contrast to plain ilog2, this rounds towards the next power of 2, 676 * so the socket application gets at least its desired sndbuf / rcvbuf size. 677 */ 678 static u8 smc_compress_bufsize(int size) 679 { 680 u8 compressed; 681 682 if (size <= SMC_BUF_MIN_SIZE) 683 return 0; 684 685 size = (size - 1) >> 14; 686 compressed = ilog2(size) + 1; 687 if (compressed >= SMC_RMBE_SIZES) 688 compressed = SMC_RMBE_SIZES - 1; 689 return compressed; 690 } 691 692 /* convert the RMB size from compressed notation into integer */ 693 int smc_uncompress_bufsize(u8 compressed) 694 { 695 u32 size; 696 697 size = 0x00000001 << (((int)compressed) + 14); 698 return (int)size; 699 } 700 701 /* try to reuse a sndbuf or rmb description slot for a certain 702 * buffer size; if not available, return NULL 703 */ 704 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 705 rwlock_t *lock, 706 struct list_head *buf_list) 707 { 708 struct smc_buf_desc *buf_slot; 709 710 read_lock_bh(lock); 711 list_for_each_entry(buf_slot, buf_list, list) { 712 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 713 read_unlock_bh(lock); 714 return buf_slot; 715 } 716 } 717 read_unlock_bh(lock); 718 return NULL; 719 } 720 721 /* one of the conditions for announcing a receiver's current window size is 722 * that it "results in a minimum increase in the window size of 10% of the 723 * receive buffer space" [RFC7609] 724 */ 725 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 726 { 727 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 728 } 729 730 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 731 bool is_rmb, int bufsize) 732 { 733 struct smc_buf_desc *buf_desc; 734 struct smc_link *lnk; 735 int rc; 736 737 /* try to alloc a new buffer */ 738 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 739 if (!buf_desc) 740 return ERR_PTR(-ENOMEM); 741 742 buf_desc->order = get_order(bufsize); 743 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 744 __GFP_NOMEMALLOC | __GFP_COMP | 745 __GFP_NORETRY | __GFP_ZERO, 746 buf_desc->order); 747 if (!buf_desc->pages) { 748 kfree(buf_desc); 749 return ERR_PTR(-EAGAIN); 750 } 751 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 752 753 /* build the sg table from the pages */ 754 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 755 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 756 GFP_KERNEL); 757 if (rc) { 758 smc_buf_free(lgr, is_rmb, buf_desc); 759 return ERR_PTR(rc); 760 } 761 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 762 buf_desc->cpu_addr, bufsize); 763 764 /* map sg table to DMA address */ 765 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 766 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 767 /* SMC protocol depends on mapping to one DMA address only */ 768 if (rc != 1) { 769 smc_buf_free(lgr, is_rmb, buf_desc); 770 return ERR_PTR(-EAGAIN); 771 } 772 773 /* create a new memory region for the RMB */ 774 if (is_rmb) { 775 rc = smc_ib_get_memory_region(lnk->roce_pd, 776 IB_ACCESS_REMOTE_WRITE | 777 IB_ACCESS_LOCAL_WRITE, 778 buf_desc); 779 if (rc) { 780 smc_buf_free(lgr, is_rmb, buf_desc); 781 return ERR_PTR(rc); 782 } 783 } 784 785 buf_desc->len = bufsize; 786 return buf_desc; 787 } 788 789 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 790 791 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 792 bool is_dmb, int bufsize) 793 { 794 struct smc_buf_desc *buf_desc; 795 int rc; 796 797 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 798 return ERR_PTR(-EAGAIN); 799 800 /* try to alloc a new DMB */ 801 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 802 if (!buf_desc) 803 return ERR_PTR(-ENOMEM); 804 if (is_dmb) { 805 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 806 if (rc) { 807 kfree(buf_desc); 808 return ERR_PTR(-EAGAIN); 809 } 810 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 811 /* CDC header stored in buf. So, pretend it was smaller */ 812 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 813 } else { 814 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 815 __GFP_NOWARN | __GFP_NORETRY | 816 __GFP_NOMEMALLOC); 817 if (!buf_desc->cpu_addr) { 818 kfree(buf_desc); 819 return ERR_PTR(-EAGAIN); 820 } 821 buf_desc->len = bufsize; 822 } 823 return buf_desc; 824 } 825 826 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 827 { 828 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 829 struct smc_connection *conn = &smc->conn; 830 struct smc_link_group *lgr = conn->lgr; 831 struct list_head *buf_list; 832 int bufsize, bufsize_short; 833 int sk_buf_size; 834 rwlock_t *lock; 835 836 if (is_rmb) 837 /* use socket recv buffer size (w/o overhead) as start value */ 838 sk_buf_size = smc->sk.sk_rcvbuf / 2; 839 else 840 /* use socket send buffer size (w/o overhead) as start value */ 841 sk_buf_size = smc->sk.sk_sndbuf / 2; 842 843 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 844 bufsize_short >= 0; bufsize_short--) { 845 846 if (is_rmb) { 847 lock = &lgr->rmbs_lock; 848 buf_list = &lgr->rmbs[bufsize_short]; 849 } else { 850 lock = &lgr->sndbufs_lock; 851 buf_list = &lgr->sndbufs[bufsize_short]; 852 } 853 bufsize = smc_uncompress_bufsize(bufsize_short); 854 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 855 continue; 856 857 /* check for reusable slot in the link group */ 858 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 859 if (buf_desc) { 860 memset(buf_desc->cpu_addr, 0, bufsize); 861 break; /* found reusable slot */ 862 } 863 864 if (is_smcd) 865 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 866 else 867 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 868 869 if (PTR_ERR(buf_desc) == -ENOMEM) 870 break; 871 if (IS_ERR(buf_desc)) 872 continue; 873 874 buf_desc->used = 1; 875 write_lock_bh(lock); 876 list_add(&buf_desc->list, buf_list); 877 write_unlock_bh(lock); 878 break; /* found */ 879 } 880 881 if (IS_ERR(buf_desc)) 882 return -ENOMEM; 883 884 if (is_rmb) { 885 conn->rmb_desc = buf_desc; 886 conn->rmbe_size_short = bufsize_short; 887 smc->sk.sk_rcvbuf = bufsize * 2; 888 atomic_set(&conn->bytes_to_rcv, 0); 889 conn->rmbe_update_limit = 890 smc_rmb_wnd_update_limit(buf_desc->len); 891 if (is_smcd) 892 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 893 } else { 894 conn->sndbuf_desc = buf_desc; 895 smc->sk.sk_sndbuf = bufsize * 2; 896 atomic_set(&conn->sndbuf_space, bufsize); 897 } 898 return 0; 899 } 900 901 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 902 { 903 struct smc_link_group *lgr = conn->lgr; 904 905 if (!conn->lgr || conn->lgr->is_smcd) 906 return; 907 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 908 conn->sndbuf_desc, DMA_TO_DEVICE); 909 } 910 911 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 912 { 913 struct smc_link_group *lgr = conn->lgr; 914 915 if (!conn->lgr || conn->lgr->is_smcd) 916 return; 917 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 918 conn->sndbuf_desc, DMA_TO_DEVICE); 919 } 920 921 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 922 { 923 struct smc_link_group *lgr = conn->lgr; 924 925 if (!conn->lgr || conn->lgr->is_smcd) 926 return; 927 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 928 conn->rmb_desc, DMA_FROM_DEVICE); 929 } 930 931 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 932 { 933 struct smc_link_group *lgr = conn->lgr; 934 935 if (!conn->lgr || conn->lgr->is_smcd) 936 return; 937 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 938 conn->rmb_desc, DMA_FROM_DEVICE); 939 } 940 941 /* create the send and receive buffer for an SMC socket; 942 * receive buffers are called RMBs; 943 * (even though the SMC protocol allows more than one RMB-element per RMB, 944 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 945 * extra RMB for every connection in a link group 946 */ 947 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 948 { 949 int rc; 950 951 /* create send buffer */ 952 rc = __smc_buf_create(smc, is_smcd, false); 953 if (rc) 954 return rc; 955 /* create rmb */ 956 rc = __smc_buf_create(smc, is_smcd, true); 957 if (rc) 958 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 959 return rc; 960 } 961 962 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 963 { 964 int i; 965 966 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 967 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 968 return i; 969 } 970 return -ENOSPC; 971 } 972 973 /* add a new rtoken from peer */ 974 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 975 { 976 u64 dma_addr = be64_to_cpu(nw_vaddr); 977 u32 rkey = ntohl(nw_rkey); 978 int i; 979 980 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 981 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 982 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 983 test_bit(i, lgr->rtokens_used_mask)) { 984 /* already in list */ 985 return i; 986 } 987 } 988 i = smc_rmb_reserve_rtoken_idx(lgr); 989 if (i < 0) 990 return i; 991 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 992 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 993 return i; 994 } 995 996 /* delete an rtoken */ 997 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 998 { 999 u32 rkey = ntohl(nw_rkey); 1000 int i; 1001 1002 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1003 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 1004 test_bit(i, lgr->rtokens_used_mask)) { 1005 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 1006 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 1007 1008 clear_bit(i, lgr->rtokens_used_mask); 1009 return 0; 1010 } 1011 } 1012 return -ENOENT; 1013 } 1014 1015 /* save rkey and dma_addr received from peer during clc handshake */ 1016 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1017 struct smc_clc_msg_accept_confirm *clc) 1018 { 1019 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 1020 clc->rmb_rkey); 1021 if (conn->rtoken_idx < 0) 1022 return conn->rtoken_idx; 1023 return 0; 1024 } 1025 1026 /* Called (from smc_exit) when module is removed */ 1027 void smc_core_exit(void) 1028 { 1029 struct smc_link_group *lgr, *lg; 1030 LIST_HEAD(lgr_freeing_list); 1031 1032 spin_lock_bh(&smc_lgr_list.lock); 1033 if (!list_empty(&smc_lgr_list.list)) 1034 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1035 spin_unlock_bh(&smc_lgr_list.lock); 1036 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1037 list_del_init(&lgr->list); 1038 if (!lgr->is_smcd) { 1039 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 1040 1041 if (lnk->state == SMC_LNK_ACTIVE) 1042 smc_llc_send_delete_link(lnk, SMC_LLC_REQ, 1043 false); 1044 smc_llc_link_inactive(lnk); 1045 } 1046 cancel_delayed_work_sync(&lgr->free_work); 1047 if (lgr->is_smcd) 1048 smc_ism_signal_shutdown(lgr); 1049 smc_lgr_free(lgr); /* free link group */ 1050 } 1051 } 1052