1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/ib_cache.h> 20 21 #include "smc.h" 22 #include "smc_clc.h" 23 #include "smc_core.h" 24 #include "smc_ib.h" 25 #include "smc_wr.h" 26 #include "smc_llc.h" 27 #include "smc_cdc.h" 28 #include "smc_close.h" 29 #include "smc_ism.h" 30 31 #define SMC_LGR_NUM_INCR 256 32 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 33 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 34 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 35 36 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 38 .list = LIST_HEAD_INIT(smc_lgr_list.list), 39 .num = 0, 40 }; 41 42 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 43 struct smc_buf_desc *buf_desc); 44 45 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 46 { 47 /* client link group creation always follows the server link group 48 * creation. For client use a somewhat higher removal delay time, 49 * otherwise there is a risk of out-of-sync link groups. 50 */ 51 mod_delayed_work(system_wq, &lgr->free_work, 52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); 54 } 55 56 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 57 { 58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST); 59 } 60 61 /* Register connection's alert token in our lookup structure. 62 * To use rbtrees we have to implement our own insert core. 63 * Requires @conns_lock 64 * @smc connection to register 65 * Returns 0 on success, != otherwise. 66 */ 67 static void smc_lgr_add_alert_token(struct smc_connection *conn) 68 { 69 struct rb_node **link, *parent = NULL; 70 u32 token = conn->alert_token_local; 71 72 link = &conn->lgr->conns_all.rb_node; 73 while (*link) { 74 struct smc_connection *cur = rb_entry(*link, 75 struct smc_connection, alert_node); 76 77 parent = *link; 78 if (cur->alert_token_local > token) 79 link = &parent->rb_left; 80 else 81 link = &parent->rb_right; 82 } 83 /* Put the new node there */ 84 rb_link_node(&conn->alert_node, parent, link); 85 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 86 } 87 88 /* Register connection in link group by assigning an alert token 89 * registered in a search tree. 90 * Requires @conns_lock 91 * Note that '0' is a reserved value and not assigned. 92 */ 93 static void smc_lgr_register_conn(struct smc_connection *conn) 94 { 95 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 96 static atomic_t nexttoken = ATOMIC_INIT(0); 97 98 /* find a new alert_token_local value not yet used by some connection 99 * in this link group 100 */ 101 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 102 while (!conn->alert_token_local) { 103 conn->alert_token_local = atomic_inc_return(&nexttoken); 104 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 105 conn->alert_token_local = 0; 106 } 107 smc_lgr_add_alert_token(conn); 108 conn->lgr->conns_num++; 109 } 110 111 /* Unregister connection and reset the alert token of the given connection< 112 */ 113 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 114 { 115 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 116 struct smc_link_group *lgr = conn->lgr; 117 118 rb_erase(&conn->alert_node, &lgr->conns_all); 119 lgr->conns_num--; 120 conn->alert_token_local = 0; 121 conn->lgr = NULL; 122 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 123 } 124 125 /* Unregister connection from lgr 126 */ 127 static void smc_lgr_unregister_conn(struct smc_connection *conn) 128 { 129 struct smc_link_group *lgr = conn->lgr; 130 131 write_lock_bh(&lgr->conns_lock); 132 if (conn->alert_token_local) { 133 __smc_lgr_unregister_conn(conn); 134 } 135 write_unlock_bh(&lgr->conns_lock); 136 } 137 138 /* Send delete link, either as client to request the initiation 139 * of the DELETE LINK sequence from server; or as server to 140 * initiate the delete processing. See smc_llc_rx_delete_link(). 141 */ 142 static int smc_link_send_delete(struct smc_link *lnk) 143 { 144 if (lnk->state == SMC_LNK_ACTIVE && 145 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { 146 smc_llc_link_deleting(lnk); 147 return 0; 148 } 149 return -ENOTCONN; 150 } 151 152 static void smc_lgr_free(struct smc_link_group *lgr); 153 154 static void smc_lgr_free_work(struct work_struct *work) 155 { 156 struct smc_link_group *lgr = container_of(to_delayed_work(work), 157 struct smc_link_group, 158 free_work); 159 bool conns; 160 161 spin_lock_bh(&smc_lgr_list.lock); 162 if (list_empty(&lgr->list)) 163 goto free; 164 read_lock_bh(&lgr->conns_lock); 165 conns = RB_EMPTY_ROOT(&lgr->conns_all); 166 read_unlock_bh(&lgr->conns_lock); 167 if (!conns) { /* number of lgr connections is no longer zero */ 168 spin_unlock_bh(&smc_lgr_list.lock); 169 return; 170 } 171 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 172 free: 173 spin_unlock_bh(&smc_lgr_list.lock); 174 175 if (!lgr->is_smcd && !lgr->terminating) { 176 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 177 178 /* try to send del link msg, on error free lgr immediately */ 179 if (lnk->state == SMC_LNK_ACTIVE && 180 !smc_link_send_delete(lnk)) { 181 /* reschedule in case we never receive a response */ 182 smc_lgr_schedule_free_work(lgr); 183 return; 184 } 185 } 186 187 if (!delayed_work_pending(&lgr->free_work)) { 188 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 189 190 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) 191 smc_llc_link_inactive(lnk); 192 if (lgr->is_smcd) 193 smc_ism_signal_shutdown(lgr); 194 smc_lgr_free(lgr); 195 } 196 } 197 198 /* create a new SMC link group */ 199 static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, 200 struct smc_ib_device *smcibdev, u8 ibport, 201 char *peer_systemid, unsigned short vlan_id, 202 struct smcd_dev *smcismdev, u64 peer_gid) 203 { 204 struct smc_link_group *lgr; 205 struct smc_link *lnk; 206 u8 rndvec[3]; 207 int rc = 0; 208 int i; 209 210 if (is_smcd && vlan_id) { 211 rc = smc_ism_get_vlan(smcismdev, vlan_id); 212 if (rc) 213 goto out; 214 } 215 216 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 217 if (!lgr) { 218 rc = -ENOMEM; 219 goto out; 220 } 221 lgr->is_smcd = is_smcd; 222 lgr->sync_err = 0; 223 lgr->vlan_id = vlan_id; 224 rwlock_init(&lgr->sndbufs_lock); 225 rwlock_init(&lgr->rmbs_lock); 226 rwlock_init(&lgr->conns_lock); 227 for (i = 0; i < SMC_RMBE_SIZES; i++) { 228 INIT_LIST_HEAD(&lgr->sndbufs[i]); 229 INIT_LIST_HEAD(&lgr->rmbs[i]); 230 } 231 smc_lgr_list.num += SMC_LGR_NUM_INCR; 232 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 233 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 234 lgr->conns_all = RB_ROOT; 235 if (is_smcd) { 236 /* SMC-D specific settings */ 237 lgr->peer_gid = peer_gid; 238 lgr->smcd = smcismdev; 239 } else { 240 /* SMC-R specific settings */ 241 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 242 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); 243 244 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 245 /* initialize link */ 246 lnk->state = SMC_LNK_ACTIVATING; 247 lnk->link_id = SMC_SINGLE_LINK; 248 lnk->smcibdev = smcibdev; 249 lnk->ibport = ibport; 250 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 251 if (!smcibdev->initialized) 252 smc_ib_setup_per_ibdev(smcibdev); 253 get_random_bytes(rndvec, sizeof(rndvec)); 254 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 255 (rndvec[2] << 16); 256 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 257 vlan_id, lnk->gid, &lnk->sgid_index); 258 if (rc) 259 goto free_lgr; 260 rc = smc_llc_link_init(lnk); 261 if (rc) 262 goto free_lgr; 263 rc = smc_wr_alloc_link_mem(lnk); 264 if (rc) 265 goto clear_llc_lnk; 266 rc = smc_ib_create_protection_domain(lnk); 267 if (rc) 268 goto free_link_mem; 269 rc = smc_ib_create_queue_pair(lnk); 270 if (rc) 271 goto dealloc_pd; 272 rc = smc_wr_create_link(lnk); 273 if (rc) 274 goto destroy_qp; 275 } 276 smc->conn.lgr = lgr; 277 spin_lock_bh(&smc_lgr_list.lock); 278 list_add(&lgr->list, &smc_lgr_list.list); 279 spin_unlock_bh(&smc_lgr_list.lock); 280 return 0; 281 282 destroy_qp: 283 smc_ib_destroy_queue_pair(lnk); 284 dealloc_pd: 285 smc_ib_dealloc_protection_domain(lnk); 286 free_link_mem: 287 smc_wr_free_link_mem(lnk); 288 clear_llc_lnk: 289 smc_llc_link_clear(lnk); 290 free_lgr: 291 kfree(lgr); 292 out: 293 return rc; 294 } 295 296 static void smc_buf_unuse(struct smc_connection *conn, 297 struct smc_link_group *lgr) 298 { 299 if (conn->sndbuf_desc) 300 conn->sndbuf_desc->used = 0; 301 if (conn->rmb_desc) { 302 if (!conn->rmb_desc->regerr) { 303 conn->rmb_desc->used = 0; 304 if (!lgr->is_smcd) { 305 /* unregister rmb with peer */ 306 smc_llc_do_delete_rkey( 307 &lgr->lnk[SMC_SINGLE_LINK], 308 conn->rmb_desc); 309 } 310 } else { 311 /* buf registration failed, reuse not possible */ 312 write_lock_bh(&lgr->rmbs_lock); 313 list_del(&conn->rmb_desc->list); 314 write_unlock_bh(&lgr->rmbs_lock); 315 316 smc_buf_free(lgr, true, conn->rmb_desc); 317 } 318 } 319 } 320 321 /* remove a finished connection from its link group */ 322 void smc_conn_free(struct smc_connection *conn) 323 { 324 struct smc_link_group *lgr = conn->lgr; 325 326 if (!lgr) 327 return; 328 if (lgr->is_smcd) { 329 smc_ism_unset_conn(conn); 330 tasklet_kill(&conn->rx_tsklet); 331 } else { 332 smc_cdc_tx_dismiss_slots(conn); 333 } 334 smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ 335 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 336 337 if (!lgr->conns_num) 338 smc_lgr_schedule_free_work(lgr); 339 } 340 341 static void smc_link_clear(struct smc_link *lnk) 342 { 343 lnk->peer_qpn = 0; 344 smc_llc_link_clear(lnk); 345 smc_ib_modify_qp_reset(lnk); 346 smc_wr_free_link(lnk); 347 smc_ib_destroy_queue_pair(lnk); 348 smc_ib_dealloc_protection_domain(lnk); 349 smc_wr_free_link_mem(lnk); 350 } 351 352 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 353 struct smc_buf_desc *buf_desc) 354 { 355 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 356 357 if (is_rmb) { 358 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 359 smc_ib_put_memory_region( 360 buf_desc->mr_rx[SMC_SINGLE_LINK]); 361 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 362 DMA_FROM_DEVICE); 363 } else { 364 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 365 DMA_TO_DEVICE); 366 } 367 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 368 if (buf_desc->pages) 369 __free_pages(buf_desc->pages, buf_desc->order); 370 kfree(buf_desc); 371 } 372 373 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 374 struct smc_buf_desc *buf_desc) 375 { 376 if (is_dmb) { 377 /* restore original buf len */ 378 buf_desc->len += sizeof(struct smcd_cdc_msg); 379 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 380 } else { 381 kfree(buf_desc->cpu_addr); 382 } 383 kfree(buf_desc); 384 } 385 386 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 387 struct smc_buf_desc *buf_desc) 388 { 389 if (lgr->is_smcd) 390 smcd_buf_free(lgr, is_rmb, buf_desc); 391 else 392 smcr_buf_free(lgr, is_rmb, buf_desc); 393 } 394 395 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 396 { 397 struct smc_buf_desc *buf_desc, *bf_desc; 398 struct list_head *buf_list; 399 int i; 400 401 for (i = 0; i < SMC_RMBE_SIZES; i++) { 402 if (is_rmb) 403 buf_list = &lgr->rmbs[i]; 404 else 405 buf_list = &lgr->sndbufs[i]; 406 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 407 list) { 408 list_del(&buf_desc->list); 409 smc_buf_free(lgr, is_rmb, buf_desc); 410 } 411 } 412 } 413 414 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 415 { 416 /* free send buffers */ 417 __smc_lgr_free_bufs(lgr, false); 418 /* free rmbs */ 419 __smc_lgr_free_bufs(lgr, true); 420 } 421 422 /* remove a link group */ 423 static void smc_lgr_free(struct smc_link_group *lgr) 424 { 425 smc_lgr_free_bufs(lgr); 426 if (lgr->is_smcd) 427 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 428 else 429 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 430 kfree(lgr); 431 } 432 433 void smc_lgr_forget(struct smc_link_group *lgr) 434 { 435 spin_lock_bh(&smc_lgr_list.lock); 436 /* do not use this link group for new connections */ 437 if (!list_empty(&lgr->list)) 438 list_del_init(&lgr->list); 439 spin_unlock_bh(&smc_lgr_list.lock); 440 } 441 442 /* terminate linkgroup abnormally */ 443 static void __smc_lgr_terminate(struct smc_link_group *lgr) 444 { 445 struct smc_connection *conn; 446 struct smc_sock *smc; 447 struct rb_node *node; 448 449 if (lgr->terminating) 450 return; /* lgr already terminating */ 451 lgr->terminating = 1; 452 if (!list_empty(&lgr->list)) /* forget lgr */ 453 list_del_init(&lgr->list); 454 if (!lgr->is_smcd) 455 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 456 457 write_lock_bh(&lgr->conns_lock); 458 node = rb_first(&lgr->conns_all); 459 while (node) { 460 conn = rb_entry(node, struct smc_connection, alert_node); 461 smc = container_of(conn, struct smc_sock, conn); 462 sock_hold(&smc->sk); /* sock_put in close work */ 463 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 464 __smc_lgr_unregister_conn(conn); 465 write_unlock_bh(&lgr->conns_lock); 466 if (!schedule_work(&conn->close_work)) 467 sock_put(&smc->sk); 468 write_lock_bh(&lgr->conns_lock); 469 node = rb_first(&lgr->conns_all); 470 } 471 write_unlock_bh(&lgr->conns_lock); 472 if (!lgr->is_smcd) 473 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 474 smc_lgr_schedule_free_work(lgr); 475 } 476 477 void smc_lgr_terminate(struct smc_link_group *lgr) 478 { 479 spin_lock_bh(&smc_lgr_list.lock); 480 __smc_lgr_terminate(lgr); 481 spin_unlock_bh(&smc_lgr_list.lock); 482 } 483 484 /* Called when IB port is terminated */ 485 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) 486 { 487 struct smc_link_group *lgr, *l; 488 489 spin_lock_bh(&smc_lgr_list.lock); 490 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 491 if (!lgr->is_smcd && 492 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && 493 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) 494 __smc_lgr_terminate(lgr); 495 } 496 spin_unlock_bh(&smc_lgr_list.lock); 497 } 498 499 /* Called when SMC-D device is terminated or peer is lost */ 500 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 501 { 502 struct smc_link_group *lgr, *l; 503 LIST_HEAD(lgr_free_list); 504 505 /* run common cleanup function and build free list */ 506 spin_lock_bh(&smc_lgr_list.lock); 507 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 508 if (lgr->is_smcd && lgr->smcd == dev && 509 (!peer_gid || lgr->peer_gid == peer_gid) && 510 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 511 __smc_lgr_terminate(lgr); 512 list_move(&lgr->list, &lgr_free_list); 513 } 514 } 515 spin_unlock_bh(&smc_lgr_list.lock); 516 517 /* cancel the regular free workers and actually free lgrs */ 518 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 519 list_del_init(&lgr->list); 520 cancel_delayed_work_sync(&lgr->free_work); 521 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */ 522 smc_ism_signal_shutdown(lgr); 523 smc_lgr_free(lgr); 524 } 525 } 526 527 /* Determine vlan of internal TCP socket. 528 * @vlan_id: address to store the determined vlan id into 529 */ 530 int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 531 { 532 struct dst_entry *dst = sk_dst_get(clcsock->sk); 533 struct net_device *ndev; 534 int i, nest_lvl, rc = 0; 535 536 *vlan_id = 0; 537 if (!dst) { 538 rc = -ENOTCONN; 539 goto out; 540 } 541 if (!dst->dev) { 542 rc = -ENODEV; 543 goto out_rel; 544 } 545 546 ndev = dst->dev; 547 if (is_vlan_dev(ndev)) { 548 *vlan_id = vlan_dev_vlan_id(ndev); 549 goto out_rel; 550 } 551 552 rtnl_lock(); 553 nest_lvl = dev_get_nest_level(ndev); 554 for (i = 0; i < nest_lvl; i++) { 555 struct list_head *lower = &ndev->adj_list.lower; 556 557 if (list_empty(lower)) 558 break; 559 lower = lower->next; 560 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 561 if (is_vlan_dev(ndev)) { 562 *vlan_id = vlan_dev_vlan_id(ndev); 563 break; 564 } 565 } 566 rtnl_unlock(); 567 568 out_rel: 569 dst_release(dst); 570 out: 571 return rc; 572 } 573 574 static bool smcr_lgr_match(struct smc_link_group *lgr, 575 struct smc_clc_msg_local *lcl, 576 enum smc_lgr_role role, u32 clcqpn) 577 { 578 return !memcmp(lgr->peer_systemid, lcl->id_for_peer, 579 SMC_SYSTEMID_LEN) && 580 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 581 SMC_GID_SIZE) && 582 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 583 sizeof(lcl->mac)) && 584 lgr->role == role && 585 (lgr->role == SMC_SERV || 586 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn); 587 } 588 589 static bool smcd_lgr_match(struct smc_link_group *lgr, 590 struct smcd_dev *smcismdev, u64 peer_gid) 591 { 592 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 593 } 594 595 /* create a new SMC connection (and a new link group if necessary) */ 596 int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, 597 struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn, 598 struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, 599 u64 peer_gid) 600 { 601 struct smc_connection *conn = &smc->conn; 602 int local_contact = SMC_FIRST_CONTACT; 603 struct smc_link_group *lgr; 604 unsigned short vlan_id; 605 enum smc_lgr_role role; 606 int rc = 0; 607 608 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 609 rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); 610 if (rc) 611 return rc; 612 613 if ((role == SMC_CLNT) && srv_first_contact) 614 /* create new link group as well */ 615 goto create; 616 617 /* determine if an existing link group can be reused */ 618 spin_lock_bh(&smc_lgr_list.lock); 619 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 620 write_lock_bh(&lgr->conns_lock); 621 if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) : 622 smcr_lgr_match(lgr, lcl, role, clcqpn)) && 623 !lgr->sync_err && 624 lgr->vlan_id == vlan_id && 625 (role == SMC_CLNT || 626 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 627 /* link group found */ 628 local_contact = SMC_REUSE_CONTACT; 629 conn->lgr = lgr; 630 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 631 write_unlock_bh(&lgr->conns_lock); 632 break; 633 } 634 write_unlock_bh(&lgr->conns_lock); 635 } 636 spin_unlock_bh(&smc_lgr_list.lock); 637 638 if (role == SMC_CLNT && !srv_first_contact && 639 (local_contact == SMC_FIRST_CONTACT)) { 640 /* Server reuses a link group, but Client wants to start 641 * a new one 642 * send out_of_sync decline, reason synchr. error 643 */ 644 return -ENOLINK; 645 } 646 647 create: 648 if (local_contact == SMC_FIRST_CONTACT) { 649 rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport, 650 lcl->id_for_peer, vlan_id, smcd, peer_gid); 651 if (rc) 652 goto out; 653 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 654 } 655 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 656 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 657 conn->urg_state = SMC_URG_READ; 658 if (is_smcd) { 659 conn->rx_off = sizeof(struct smcd_cdc_msg); 660 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 661 } 662 #ifndef KERNEL_HAS_ATOMIC64 663 spin_lock_init(&conn->acurs_lock); 664 #endif 665 666 out: 667 return rc ? rc : local_contact; 668 } 669 670 /* convert the RMB size into the compressed notation - minimum 16K. 671 * In contrast to plain ilog2, this rounds towards the next power of 2, 672 * so the socket application gets at least its desired sndbuf / rcvbuf size. 673 */ 674 static u8 smc_compress_bufsize(int size) 675 { 676 u8 compressed; 677 678 if (size <= SMC_BUF_MIN_SIZE) 679 return 0; 680 681 size = (size - 1) >> 14; 682 compressed = ilog2(size) + 1; 683 if (compressed >= SMC_RMBE_SIZES) 684 compressed = SMC_RMBE_SIZES - 1; 685 return compressed; 686 } 687 688 /* convert the RMB size from compressed notation into integer */ 689 int smc_uncompress_bufsize(u8 compressed) 690 { 691 u32 size; 692 693 size = 0x00000001 << (((int)compressed) + 14); 694 return (int)size; 695 } 696 697 /* try to reuse a sndbuf or rmb description slot for a certain 698 * buffer size; if not available, return NULL 699 */ 700 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 701 rwlock_t *lock, 702 struct list_head *buf_list) 703 { 704 struct smc_buf_desc *buf_slot; 705 706 read_lock_bh(lock); 707 list_for_each_entry(buf_slot, buf_list, list) { 708 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 709 read_unlock_bh(lock); 710 return buf_slot; 711 } 712 } 713 read_unlock_bh(lock); 714 return NULL; 715 } 716 717 /* one of the conditions for announcing a receiver's current window size is 718 * that it "results in a minimum increase in the window size of 10% of the 719 * receive buffer space" [RFC7609] 720 */ 721 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 722 { 723 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 724 } 725 726 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 727 bool is_rmb, int bufsize) 728 { 729 struct smc_buf_desc *buf_desc; 730 struct smc_link *lnk; 731 int rc; 732 733 /* try to alloc a new buffer */ 734 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 735 if (!buf_desc) 736 return ERR_PTR(-ENOMEM); 737 738 buf_desc->order = get_order(bufsize); 739 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 740 __GFP_NOMEMALLOC | __GFP_COMP | 741 __GFP_NORETRY | __GFP_ZERO, 742 buf_desc->order); 743 if (!buf_desc->pages) { 744 kfree(buf_desc); 745 return ERR_PTR(-EAGAIN); 746 } 747 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 748 749 /* build the sg table from the pages */ 750 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 751 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 752 GFP_KERNEL); 753 if (rc) { 754 smc_buf_free(lgr, is_rmb, buf_desc); 755 return ERR_PTR(rc); 756 } 757 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 758 buf_desc->cpu_addr, bufsize); 759 760 /* map sg table to DMA address */ 761 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 762 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 763 /* SMC protocol depends on mapping to one DMA address only */ 764 if (rc != 1) { 765 smc_buf_free(lgr, is_rmb, buf_desc); 766 return ERR_PTR(-EAGAIN); 767 } 768 769 /* create a new memory region for the RMB */ 770 if (is_rmb) { 771 rc = smc_ib_get_memory_region(lnk->roce_pd, 772 IB_ACCESS_REMOTE_WRITE | 773 IB_ACCESS_LOCAL_WRITE, 774 buf_desc); 775 if (rc) { 776 smc_buf_free(lgr, is_rmb, buf_desc); 777 return ERR_PTR(rc); 778 } 779 } 780 781 buf_desc->len = bufsize; 782 return buf_desc; 783 } 784 785 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 786 787 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 788 bool is_dmb, int bufsize) 789 { 790 struct smc_buf_desc *buf_desc; 791 int rc; 792 793 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 794 return ERR_PTR(-EAGAIN); 795 796 /* try to alloc a new DMB */ 797 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 798 if (!buf_desc) 799 return ERR_PTR(-ENOMEM); 800 if (is_dmb) { 801 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 802 if (rc) { 803 kfree(buf_desc); 804 return ERR_PTR(-EAGAIN); 805 } 806 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 807 /* CDC header stored in buf. So, pretend it was smaller */ 808 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 809 } else { 810 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 811 __GFP_NOWARN | __GFP_NORETRY | 812 __GFP_NOMEMALLOC); 813 if (!buf_desc->cpu_addr) { 814 kfree(buf_desc); 815 return ERR_PTR(-EAGAIN); 816 } 817 buf_desc->len = bufsize; 818 } 819 return buf_desc; 820 } 821 822 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 823 { 824 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 825 struct smc_connection *conn = &smc->conn; 826 struct smc_link_group *lgr = conn->lgr; 827 struct list_head *buf_list; 828 int bufsize, bufsize_short; 829 int sk_buf_size; 830 rwlock_t *lock; 831 832 if (is_rmb) 833 /* use socket recv buffer size (w/o overhead) as start value */ 834 sk_buf_size = smc->sk.sk_rcvbuf / 2; 835 else 836 /* use socket send buffer size (w/o overhead) as start value */ 837 sk_buf_size = smc->sk.sk_sndbuf / 2; 838 839 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 840 bufsize_short >= 0; bufsize_short--) { 841 842 if (is_rmb) { 843 lock = &lgr->rmbs_lock; 844 buf_list = &lgr->rmbs[bufsize_short]; 845 } else { 846 lock = &lgr->sndbufs_lock; 847 buf_list = &lgr->sndbufs[bufsize_short]; 848 } 849 bufsize = smc_uncompress_bufsize(bufsize_short); 850 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 851 continue; 852 853 /* check for reusable slot in the link group */ 854 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 855 if (buf_desc) { 856 memset(buf_desc->cpu_addr, 0, bufsize); 857 break; /* found reusable slot */ 858 } 859 860 if (is_smcd) 861 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 862 else 863 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 864 865 if (PTR_ERR(buf_desc) == -ENOMEM) 866 break; 867 if (IS_ERR(buf_desc)) 868 continue; 869 870 buf_desc->used = 1; 871 write_lock_bh(lock); 872 list_add(&buf_desc->list, buf_list); 873 write_unlock_bh(lock); 874 break; /* found */ 875 } 876 877 if (IS_ERR(buf_desc)) 878 return -ENOMEM; 879 880 if (is_rmb) { 881 conn->rmb_desc = buf_desc; 882 conn->rmbe_size_short = bufsize_short; 883 smc->sk.sk_rcvbuf = bufsize * 2; 884 atomic_set(&conn->bytes_to_rcv, 0); 885 conn->rmbe_update_limit = 886 smc_rmb_wnd_update_limit(buf_desc->len); 887 if (is_smcd) 888 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 889 } else { 890 conn->sndbuf_desc = buf_desc; 891 smc->sk.sk_sndbuf = bufsize * 2; 892 atomic_set(&conn->sndbuf_space, bufsize); 893 } 894 return 0; 895 } 896 897 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 898 { 899 struct smc_link_group *lgr = conn->lgr; 900 901 if (!conn->lgr || conn->lgr->is_smcd) 902 return; 903 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 904 conn->sndbuf_desc, DMA_TO_DEVICE); 905 } 906 907 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 908 { 909 struct smc_link_group *lgr = conn->lgr; 910 911 if (!conn->lgr || conn->lgr->is_smcd) 912 return; 913 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 914 conn->sndbuf_desc, DMA_TO_DEVICE); 915 } 916 917 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 918 { 919 struct smc_link_group *lgr = conn->lgr; 920 921 if (!conn->lgr || conn->lgr->is_smcd) 922 return; 923 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 924 conn->rmb_desc, DMA_FROM_DEVICE); 925 } 926 927 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 928 { 929 struct smc_link_group *lgr = conn->lgr; 930 931 if (!conn->lgr || conn->lgr->is_smcd) 932 return; 933 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 934 conn->rmb_desc, DMA_FROM_DEVICE); 935 } 936 937 /* create the send and receive buffer for an SMC socket; 938 * receive buffers are called RMBs; 939 * (even though the SMC protocol allows more than one RMB-element per RMB, 940 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 941 * extra RMB for every connection in a link group 942 */ 943 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 944 { 945 int rc; 946 947 /* create send buffer */ 948 rc = __smc_buf_create(smc, is_smcd, false); 949 if (rc) 950 return rc; 951 /* create rmb */ 952 rc = __smc_buf_create(smc, is_smcd, true); 953 if (rc) 954 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 955 return rc; 956 } 957 958 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 959 { 960 int i; 961 962 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 963 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 964 return i; 965 } 966 return -ENOSPC; 967 } 968 969 /* add a new rtoken from peer */ 970 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 971 { 972 u64 dma_addr = be64_to_cpu(nw_vaddr); 973 u32 rkey = ntohl(nw_rkey); 974 int i; 975 976 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 977 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 978 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 979 test_bit(i, lgr->rtokens_used_mask)) { 980 /* already in list */ 981 return i; 982 } 983 } 984 i = smc_rmb_reserve_rtoken_idx(lgr); 985 if (i < 0) 986 return i; 987 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 988 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 989 return i; 990 } 991 992 /* delete an rtoken */ 993 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 994 { 995 u32 rkey = ntohl(nw_rkey); 996 int i; 997 998 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 999 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 1000 test_bit(i, lgr->rtokens_used_mask)) { 1001 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 1002 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 1003 1004 clear_bit(i, lgr->rtokens_used_mask); 1005 return 0; 1006 } 1007 } 1008 return -ENOENT; 1009 } 1010 1011 /* save rkey and dma_addr received from peer during clc handshake */ 1012 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1013 struct smc_clc_msg_accept_confirm *clc) 1014 { 1015 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 1016 clc->rmb_rkey); 1017 if (conn->rtoken_idx < 0) 1018 return conn->rtoken_idx; 1019 return 0; 1020 } 1021 1022 /* Called (from smc_exit) when module is removed */ 1023 void smc_core_exit(void) 1024 { 1025 struct smc_link_group *lgr, *lg; 1026 LIST_HEAD(lgr_freeing_list); 1027 1028 spin_lock_bh(&smc_lgr_list.lock); 1029 if (!list_empty(&smc_lgr_list.list)) 1030 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1031 spin_unlock_bh(&smc_lgr_list.lock); 1032 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1033 list_del_init(&lgr->list); 1034 if (!lgr->is_smcd) { 1035 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 1036 1037 if (lnk->state == SMC_LNK_ACTIVE) 1038 smc_llc_send_delete_link(lnk, SMC_LLC_REQ, 1039 false); 1040 smc_llc_link_inactive(lnk); 1041 } 1042 cancel_delayed_work_sync(&lgr->free_work); 1043 if (lgr->is_smcd) 1044 smc_ism_signal_shutdown(lgr); 1045 smc_lgr_free(lgr); /* free link group */ 1046 } 1047 } 1048