1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <linux/wait.h> 17 #include <linux/reboot.h> 18 #include <linux/mutex.h> 19 #include <net/tcp.h> 20 #include <net/sock.h> 21 #include <rdma/ib_verbs.h> 22 #include <rdma/ib_cache.h> 23 24 #include "smc.h" 25 #include "smc_clc.h" 26 #include "smc_core.h" 27 #include "smc_ib.h" 28 #include "smc_wr.h" 29 #include "smc_llc.h" 30 #include "smc_cdc.h" 31 #include "smc_close.h" 32 #include "smc_ism.h" 33 34 #define SMC_LGR_NUM_INCR 256 35 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 36 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 37 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 38 39 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 40 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 41 .list = LIST_HEAD_INIT(smc_lgr_list.list), 42 .num = 0, 43 }; 44 45 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ 46 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); 47 48 struct smc_ib_up_work { 49 struct work_struct work; 50 struct smc_link_group *lgr; 51 struct smc_ib_device *smcibdev; 52 u8 ibport; 53 }; 54 55 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 56 struct smc_buf_desc *buf_desc); 57 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); 58 59 static void smc_link_up_work(struct work_struct *work); 60 static void smc_link_down_work(struct work_struct *work); 61 62 /* return head of link group list and its lock for a given link group */ 63 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, 64 spinlock_t **lgr_lock) 65 { 66 if (lgr->is_smcd) { 67 *lgr_lock = &lgr->smcd->lgr_lock; 68 return &lgr->smcd->lgr_list; 69 } 70 71 *lgr_lock = &smc_lgr_list.lock; 72 return &smc_lgr_list.list; 73 } 74 75 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 76 { 77 /* client link group creation always follows the server link group 78 * creation. For client use a somewhat higher removal delay time, 79 * otherwise there is a risk of out-of-sync link groups. 80 */ 81 if (!lgr->freeing && !lgr->freefast) { 82 mod_delayed_work(system_wq, &lgr->free_work, 83 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 84 SMC_LGR_FREE_DELAY_CLNT : 85 SMC_LGR_FREE_DELAY_SERV); 86 } 87 } 88 89 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 90 { 91 if (!lgr->freeing && !lgr->freefast) { 92 lgr->freefast = 1; 93 mod_delayed_work(system_wq, &lgr->free_work, 94 SMC_LGR_FREE_DELAY_FAST); 95 } 96 } 97 98 /* Register connection's alert token in our lookup structure. 99 * To use rbtrees we have to implement our own insert core. 100 * Requires @conns_lock 101 * @smc connection to register 102 * Returns 0 on success, != otherwise. 103 */ 104 static void smc_lgr_add_alert_token(struct smc_connection *conn) 105 { 106 struct rb_node **link, *parent = NULL; 107 u32 token = conn->alert_token_local; 108 109 link = &conn->lgr->conns_all.rb_node; 110 while (*link) { 111 struct smc_connection *cur = rb_entry(*link, 112 struct smc_connection, alert_node); 113 114 parent = *link; 115 if (cur->alert_token_local > token) 116 link = &parent->rb_left; 117 else 118 link = &parent->rb_right; 119 } 120 /* Put the new node there */ 121 rb_link_node(&conn->alert_node, parent, link); 122 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 123 } 124 125 /* assign an SMC-R link to the connection */ 126 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) 127 { 128 enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : 129 SMC_LNK_ACTIVE; 130 int i, j; 131 132 /* do link balancing */ 133 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 134 struct smc_link *lnk = &conn->lgr->lnk[i]; 135 136 if (lnk->state != expected || lnk->link_is_asym) 137 continue; 138 if (conn->lgr->role == SMC_CLNT) { 139 conn->lnk = lnk; /* temporary, SMC server assigns link*/ 140 break; 141 } 142 if (conn->lgr->conns_num % 2) { 143 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { 144 struct smc_link *lnk2; 145 146 lnk2 = &conn->lgr->lnk[j]; 147 if (lnk2->state == expected && 148 !lnk2->link_is_asym) { 149 conn->lnk = lnk2; 150 break; 151 } 152 } 153 } 154 if (!conn->lnk) 155 conn->lnk = lnk; 156 break; 157 } 158 if (!conn->lnk) 159 return SMC_CLC_DECL_NOACTLINK; 160 return 0; 161 } 162 163 /* Register connection in link group by assigning an alert token 164 * registered in a search tree. 165 * Requires @conns_lock 166 * Note that '0' is a reserved value and not assigned. 167 */ 168 static int smc_lgr_register_conn(struct smc_connection *conn, bool first) 169 { 170 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 171 static atomic_t nexttoken = ATOMIC_INIT(0); 172 int rc; 173 174 if (!conn->lgr->is_smcd) { 175 rc = smcr_lgr_conn_assign_link(conn, first); 176 if (rc) 177 return rc; 178 } 179 /* find a new alert_token_local value not yet used by some connection 180 * in this link group 181 */ 182 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 183 while (!conn->alert_token_local) { 184 conn->alert_token_local = atomic_inc_return(&nexttoken); 185 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 186 conn->alert_token_local = 0; 187 } 188 smc_lgr_add_alert_token(conn); 189 conn->lgr->conns_num++; 190 return 0; 191 } 192 193 /* Unregister connection and reset the alert token of the given connection< 194 */ 195 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 196 { 197 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 198 struct smc_link_group *lgr = conn->lgr; 199 200 rb_erase(&conn->alert_node, &lgr->conns_all); 201 lgr->conns_num--; 202 conn->alert_token_local = 0; 203 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 204 } 205 206 /* Unregister connection from lgr 207 */ 208 static void smc_lgr_unregister_conn(struct smc_connection *conn) 209 { 210 struct smc_link_group *lgr = conn->lgr; 211 212 if (!lgr) 213 return; 214 write_lock_bh(&lgr->conns_lock); 215 if (conn->alert_token_local) { 216 __smc_lgr_unregister_conn(conn); 217 } 218 write_unlock_bh(&lgr->conns_lock); 219 conn->lgr = NULL; 220 } 221 222 void smc_lgr_cleanup_early(struct smc_connection *conn) 223 { 224 struct smc_link_group *lgr = conn->lgr; 225 struct list_head *lgr_list; 226 spinlock_t *lgr_lock; 227 228 if (!lgr) 229 return; 230 231 smc_conn_free(conn); 232 lgr_list = smc_lgr_list_head(lgr, &lgr_lock); 233 spin_lock_bh(lgr_lock); 234 /* do not use this link group for new connections */ 235 if (!list_empty(lgr_list)) 236 list_del_init(lgr_list); 237 spin_unlock_bh(lgr_lock); 238 smc_lgr_schedule_free_work_fast(lgr); 239 } 240 241 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) 242 { 243 int i; 244 245 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 246 struct smc_link *lnk = &lgr->lnk[i]; 247 248 if (smc_link_usable(lnk)) 249 lnk->state = SMC_LNK_INACTIVE; 250 } 251 wake_up_all(&lgr->llc_msg_waiter); 252 wake_up_all(&lgr->llc_flow_waiter); 253 } 254 255 static void smc_lgr_free(struct smc_link_group *lgr); 256 257 static void smc_lgr_free_work(struct work_struct *work) 258 { 259 struct smc_link_group *lgr = container_of(to_delayed_work(work), 260 struct smc_link_group, 261 free_work); 262 spinlock_t *lgr_lock; 263 bool conns; 264 265 smc_lgr_list_head(lgr, &lgr_lock); 266 spin_lock_bh(lgr_lock); 267 if (lgr->freeing) { 268 spin_unlock_bh(lgr_lock); 269 return; 270 } 271 read_lock_bh(&lgr->conns_lock); 272 conns = RB_EMPTY_ROOT(&lgr->conns_all); 273 read_unlock_bh(&lgr->conns_lock); 274 if (!conns) { /* number of lgr connections is no longer zero */ 275 spin_unlock_bh(lgr_lock); 276 return; 277 } 278 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 279 lgr->freeing = 1; /* this instance does the freeing, no new schedule */ 280 spin_unlock_bh(lgr_lock); 281 cancel_delayed_work(&lgr->free_work); 282 283 if (!lgr->is_smcd && !lgr->terminating) 284 smc_llc_send_link_delete_all(lgr, true, 285 SMC_LLC_DEL_PROG_INIT_TERM); 286 if (lgr->is_smcd && !lgr->terminating) 287 smc_ism_signal_shutdown(lgr); 288 if (!lgr->is_smcd) 289 smcr_lgr_link_deactivate_all(lgr); 290 smc_lgr_free(lgr); 291 } 292 293 static void smc_lgr_terminate_work(struct work_struct *work) 294 { 295 struct smc_link_group *lgr = container_of(work, struct smc_link_group, 296 terminate_work); 297 298 __smc_lgr_terminate(lgr, true); 299 } 300 301 /* return next unique link id for the lgr */ 302 static u8 smcr_next_link_id(struct smc_link_group *lgr) 303 { 304 u8 link_id; 305 int i; 306 307 while (1) { 308 link_id = ++lgr->next_link_id; 309 if (!link_id) /* skip zero as link_id */ 310 link_id = ++lgr->next_link_id; 311 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 312 if (smc_link_usable(&lgr->lnk[i]) && 313 lgr->lnk[i].link_id == link_id) 314 continue; 315 } 316 break; 317 } 318 return link_id; 319 } 320 321 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, 322 u8 link_idx, struct smc_init_info *ini) 323 { 324 u8 rndvec[3]; 325 int rc; 326 327 get_device(&ini->ib_dev->ibdev->dev); 328 atomic_inc(&ini->ib_dev->lnk_cnt); 329 lnk->state = SMC_LNK_ACTIVATING; 330 lnk->link_id = smcr_next_link_id(lgr); 331 lnk->lgr = lgr; 332 lnk->link_idx = link_idx; 333 lnk->smcibdev = ini->ib_dev; 334 lnk->ibport = ini->ib_port; 335 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 336 smc_llc_link_set_uid(lnk); 337 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); 338 if (!ini->ib_dev->initialized) { 339 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); 340 if (rc) 341 goto out; 342 } 343 get_random_bytes(rndvec, sizeof(rndvec)); 344 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 345 (rndvec[2] << 16); 346 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 347 ini->vlan_id, lnk->gid, &lnk->sgid_index); 348 if (rc) 349 goto out; 350 rc = smc_llc_link_init(lnk); 351 if (rc) 352 goto out; 353 rc = smc_wr_alloc_link_mem(lnk); 354 if (rc) 355 goto clear_llc_lnk; 356 rc = smc_ib_create_protection_domain(lnk); 357 if (rc) 358 goto free_link_mem; 359 rc = smc_ib_create_queue_pair(lnk); 360 if (rc) 361 goto dealloc_pd; 362 rc = smc_wr_create_link(lnk); 363 if (rc) 364 goto destroy_qp; 365 return 0; 366 367 destroy_qp: 368 smc_ib_destroy_queue_pair(lnk); 369 dealloc_pd: 370 smc_ib_dealloc_protection_domain(lnk); 371 free_link_mem: 372 smc_wr_free_link_mem(lnk); 373 clear_llc_lnk: 374 smc_llc_link_clear(lnk, false); 375 out: 376 put_device(&ini->ib_dev->ibdev->dev); 377 memset(lnk, 0, sizeof(struct smc_link)); 378 lnk->state = SMC_LNK_UNUSED; 379 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) 380 wake_up(&ini->ib_dev->lnks_deleted); 381 return rc; 382 } 383 384 /* create a new SMC link group */ 385 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 386 { 387 struct smc_link_group *lgr; 388 struct list_head *lgr_list; 389 struct smc_link *lnk; 390 spinlock_t *lgr_lock; 391 u8 link_idx; 392 int rc = 0; 393 int i; 394 395 if (ini->is_smcd && ini->vlan_id) { 396 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 397 rc = SMC_CLC_DECL_ISMVLANERR; 398 goto out; 399 } 400 } 401 402 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 403 if (!lgr) { 404 rc = SMC_CLC_DECL_MEM; 405 goto ism_put_vlan; 406 } 407 lgr->is_smcd = ini->is_smcd; 408 lgr->sync_err = 0; 409 lgr->terminating = 0; 410 lgr->freefast = 0; 411 lgr->freeing = 0; 412 lgr->vlan_id = ini->vlan_id; 413 mutex_init(&lgr->sndbufs_lock); 414 mutex_init(&lgr->rmbs_lock); 415 rwlock_init(&lgr->conns_lock); 416 for (i = 0; i < SMC_RMBE_SIZES; i++) { 417 INIT_LIST_HEAD(&lgr->sndbufs[i]); 418 INIT_LIST_HEAD(&lgr->rmbs[i]); 419 } 420 lgr->next_link_id = 0; 421 smc_lgr_list.num += SMC_LGR_NUM_INCR; 422 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 423 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 424 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); 425 lgr->conns_all = RB_ROOT; 426 if (ini->is_smcd) { 427 /* SMC-D specific settings */ 428 get_device(&ini->ism_dev->dev); 429 lgr->peer_gid = ini->ism_gid; 430 lgr->smcd = ini->ism_dev; 431 lgr_list = &ini->ism_dev->lgr_list; 432 lgr_lock = &lgr->smcd->lgr_lock; 433 lgr->peer_shutdown = 0; 434 atomic_inc(&ini->ism_dev->lgr_cnt); 435 } else { 436 /* SMC-R specific settings */ 437 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 438 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 439 SMC_SYSTEMID_LEN); 440 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], 441 SMC_MAX_PNETID_LEN); 442 smc_llc_lgr_init(lgr, smc); 443 444 link_idx = SMC_SINGLE_LINK; 445 lnk = &lgr->lnk[link_idx]; 446 rc = smcr_link_init(lgr, lnk, link_idx, ini); 447 if (rc) 448 goto free_lgr; 449 lgr_list = &smc_lgr_list.list; 450 lgr_lock = &smc_lgr_list.lock; 451 atomic_inc(&lgr_cnt); 452 } 453 smc->conn.lgr = lgr; 454 spin_lock_bh(lgr_lock); 455 list_add(&lgr->list, lgr_list); 456 spin_unlock_bh(lgr_lock); 457 return 0; 458 459 free_lgr: 460 kfree(lgr); 461 ism_put_vlan: 462 if (ini->is_smcd && ini->vlan_id) 463 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 464 out: 465 if (rc < 0) { 466 if (rc == -ENOMEM) 467 rc = SMC_CLC_DECL_MEM; 468 else 469 rc = SMC_CLC_DECL_INTERR; 470 } 471 return rc; 472 } 473 474 static int smc_write_space(struct smc_connection *conn) 475 { 476 int buffer_len = conn->peer_rmbe_size; 477 union smc_host_cursor prod; 478 union smc_host_cursor cons; 479 int space; 480 481 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); 482 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 483 /* determine rx_buf space */ 484 space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); 485 return space; 486 } 487 488 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, 489 struct smc_wr_buf *wr_buf) 490 { 491 struct smc_connection *conn = &smc->conn; 492 union smc_host_cursor cons, fin; 493 int rc = 0; 494 int diff; 495 496 smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); 497 smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); 498 /* set prod cursor to old state, enforce tx_rdma_writes() */ 499 smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); 500 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 501 502 if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { 503 /* cons cursor advanced more than fin, and prod was set 504 * fin above, so now prod is smaller than cons. Fix that. 505 */ 506 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); 507 smc_curs_add(conn->sndbuf_desc->len, 508 &conn->tx_curs_sent, diff); 509 smc_curs_add(conn->sndbuf_desc->len, 510 &conn->tx_curs_fin, diff); 511 512 smp_mb__before_atomic(); 513 atomic_add(diff, &conn->sndbuf_space); 514 smp_mb__after_atomic(); 515 516 smc_curs_add(conn->peer_rmbe_size, 517 &conn->local_tx_ctrl.prod, diff); 518 smc_curs_add(conn->peer_rmbe_size, 519 &conn->local_tx_ctrl_fin, diff); 520 } 521 /* recalculate, value is used by tx_rdma_writes() */ 522 atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); 523 524 if (smc->sk.sk_state != SMC_INIT && 525 smc->sk.sk_state != SMC_CLOSED) { 526 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); 527 if (!rc) { 528 schedule_delayed_work(&conn->tx_work, 0); 529 smc->sk.sk_data_ready(&smc->sk); 530 } 531 } else { 532 smc_wr_tx_put_slot(conn->lnk, 533 (struct smc_wr_tx_pend_priv *)pend); 534 } 535 return rc; 536 } 537 538 struct smc_link *smc_switch_conns(struct smc_link_group *lgr, 539 struct smc_link *from_lnk, bool is_dev_err) 540 { 541 struct smc_link *to_lnk = NULL; 542 struct smc_cdc_tx_pend *pend; 543 struct smc_connection *conn; 544 struct smc_wr_buf *wr_buf; 545 struct smc_sock *smc; 546 struct rb_node *node; 547 int i, rc = 0; 548 549 /* link is inactive, wake up tx waiters */ 550 smc_wr_wakeup_tx_wait(from_lnk); 551 552 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 553 if (lgr->lnk[i].state != SMC_LNK_ACTIVE || 554 i == from_lnk->link_idx) 555 continue; 556 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && 557 from_lnk->ibport == lgr->lnk[i].ibport) { 558 continue; 559 } 560 to_lnk = &lgr->lnk[i]; 561 break; 562 } 563 if (!to_lnk) { 564 smc_lgr_terminate_sched(lgr); 565 return NULL; 566 } 567 again: 568 read_lock_bh(&lgr->conns_lock); 569 for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { 570 conn = rb_entry(node, struct smc_connection, alert_node); 571 if (conn->lnk != from_lnk) 572 continue; 573 smc = container_of(conn, struct smc_sock, conn); 574 /* conn->lnk not yet set in SMC_INIT state */ 575 if (smc->sk.sk_state == SMC_INIT) 576 continue; 577 if (smc->sk.sk_state == SMC_CLOSED || 578 smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || 579 smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || 580 smc->sk.sk_state == SMC_APPFINCLOSEWAIT || 581 smc->sk.sk_state == SMC_APPCLOSEWAIT1 || 582 smc->sk.sk_state == SMC_APPCLOSEWAIT2 || 583 smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || 584 smc->sk.sk_state == SMC_PEERABORTWAIT || 585 smc->sk.sk_state == SMC_PROCESSABORT) { 586 spin_lock_bh(&conn->send_lock); 587 conn->lnk = to_lnk; 588 spin_unlock_bh(&conn->send_lock); 589 continue; 590 } 591 sock_hold(&smc->sk); 592 read_unlock_bh(&lgr->conns_lock); 593 /* pre-fetch buffer outside of send_lock, might sleep */ 594 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); 595 if (rc) { 596 smcr_link_down_cond_sched(to_lnk); 597 return NULL; 598 } 599 /* avoid race with smcr_tx_sndbuf_nonempty() */ 600 spin_lock_bh(&conn->send_lock); 601 conn->lnk = to_lnk; 602 rc = smc_switch_cursor(smc, pend, wr_buf); 603 spin_unlock_bh(&conn->send_lock); 604 sock_put(&smc->sk); 605 if (rc) { 606 smcr_link_down_cond_sched(to_lnk); 607 return NULL; 608 } 609 goto again; 610 } 611 read_unlock_bh(&lgr->conns_lock); 612 return to_lnk; 613 } 614 615 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, 616 struct smc_link_group *lgr) 617 { 618 int rc; 619 620 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { 621 /* unregister rmb with peer */ 622 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 623 if (!rc) { 624 /* protect against smc_llc_cli_rkey_exchange() */ 625 mutex_lock(&lgr->llc_conf_mutex); 626 smc_llc_do_delete_rkey(lgr, rmb_desc); 627 rmb_desc->is_conf_rkey = false; 628 mutex_unlock(&lgr->llc_conf_mutex); 629 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 630 } 631 } 632 633 if (rmb_desc->is_reg_err) { 634 /* buf registration failed, reuse not possible */ 635 mutex_lock(&lgr->rmbs_lock); 636 list_del(&rmb_desc->list); 637 mutex_unlock(&lgr->rmbs_lock); 638 639 smc_buf_free(lgr, true, rmb_desc); 640 } else { 641 rmb_desc->used = 0; 642 } 643 } 644 645 static void smc_buf_unuse(struct smc_connection *conn, 646 struct smc_link_group *lgr) 647 { 648 if (conn->sndbuf_desc) 649 conn->sndbuf_desc->used = 0; 650 if (conn->rmb_desc && lgr->is_smcd) 651 conn->rmb_desc->used = 0; 652 else if (conn->rmb_desc) 653 smcr_buf_unuse(conn->rmb_desc, lgr); 654 } 655 656 /* remove a finished connection from its link group */ 657 void smc_conn_free(struct smc_connection *conn) 658 { 659 struct smc_link_group *lgr = conn->lgr; 660 661 if (!lgr) 662 return; 663 if (lgr->is_smcd) { 664 if (!list_empty(&lgr->list)) 665 smc_ism_unset_conn(conn); 666 tasklet_kill(&conn->rx_tsklet); 667 } else { 668 smc_cdc_tx_dismiss_slots(conn); 669 if (current_work() != &conn->abort_work) 670 cancel_work_sync(&conn->abort_work); 671 } 672 if (!list_empty(&lgr->list)) { 673 smc_lgr_unregister_conn(conn); 674 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 675 } 676 677 if (!lgr->conns_num) 678 smc_lgr_schedule_free_work(lgr); 679 } 680 681 /* unregister a link from a buf_desc */ 682 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, 683 struct smc_link *lnk) 684 { 685 if (is_rmb) 686 buf_desc->is_reg_mr[lnk->link_idx] = false; 687 if (!buf_desc->is_map_ib[lnk->link_idx]) 688 return; 689 if (is_rmb) { 690 if (buf_desc->mr_rx[lnk->link_idx]) { 691 smc_ib_put_memory_region( 692 buf_desc->mr_rx[lnk->link_idx]); 693 buf_desc->mr_rx[lnk->link_idx] = NULL; 694 } 695 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); 696 } else { 697 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); 698 } 699 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 700 buf_desc->is_map_ib[lnk->link_idx] = false; 701 } 702 703 /* unmap all buffers of lgr for a deleted link */ 704 static void smcr_buf_unmap_lgr(struct smc_link *lnk) 705 { 706 struct smc_link_group *lgr = lnk->lgr; 707 struct smc_buf_desc *buf_desc, *bf; 708 int i; 709 710 for (i = 0; i < SMC_RMBE_SIZES; i++) { 711 mutex_lock(&lgr->rmbs_lock); 712 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) 713 smcr_buf_unmap_link(buf_desc, true, lnk); 714 mutex_unlock(&lgr->rmbs_lock); 715 mutex_lock(&lgr->sndbufs_lock); 716 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], 717 list) 718 smcr_buf_unmap_link(buf_desc, false, lnk); 719 mutex_unlock(&lgr->sndbufs_lock); 720 } 721 } 722 723 static void smcr_rtoken_clear_link(struct smc_link *lnk) 724 { 725 struct smc_link_group *lgr = lnk->lgr; 726 int i; 727 728 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 729 lgr->rtokens[i][lnk->link_idx].rkey = 0; 730 lgr->rtokens[i][lnk->link_idx].dma_addr = 0; 731 } 732 } 733 734 /* must be called under lgr->llc_conf_mutex lock */ 735 void smcr_link_clear(struct smc_link *lnk, bool log) 736 { 737 struct smc_ib_device *smcibdev; 738 739 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) 740 return; 741 lnk->peer_qpn = 0; 742 smc_llc_link_clear(lnk, log); 743 smcr_buf_unmap_lgr(lnk); 744 smcr_rtoken_clear_link(lnk); 745 smc_ib_modify_qp_reset(lnk); 746 smc_wr_free_link(lnk); 747 smc_ib_destroy_queue_pair(lnk); 748 smc_ib_dealloc_protection_domain(lnk); 749 smc_wr_free_link_mem(lnk); 750 put_device(&lnk->smcibdev->ibdev->dev); 751 smcibdev = lnk->smcibdev; 752 memset(lnk, 0, sizeof(struct smc_link)); 753 lnk->state = SMC_LNK_UNUSED; 754 if (!atomic_dec_return(&smcibdev->lnk_cnt)) 755 wake_up(&smcibdev->lnks_deleted); 756 } 757 758 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 759 struct smc_buf_desc *buf_desc) 760 { 761 int i; 762 763 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 764 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); 765 766 if (buf_desc->pages) 767 __free_pages(buf_desc->pages, buf_desc->order); 768 kfree(buf_desc); 769 } 770 771 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 772 struct smc_buf_desc *buf_desc) 773 { 774 if (is_dmb) { 775 /* restore original buf len */ 776 buf_desc->len += sizeof(struct smcd_cdc_msg); 777 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 778 } else { 779 kfree(buf_desc->cpu_addr); 780 } 781 kfree(buf_desc); 782 } 783 784 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 785 struct smc_buf_desc *buf_desc) 786 { 787 if (lgr->is_smcd) 788 smcd_buf_free(lgr, is_rmb, buf_desc); 789 else 790 smcr_buf_free(lgr, is_rmb, buf_desc); 791 } 792 793 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 794 { 795 struct smc_buf_desc *buf_desc, *bf_desc; 796 struct list_head *buf_list; 797 int i; 798 799 for (i = 0; i < SMC_RMBE_SIZES; i++) { 800 if (is_rmb) 801 buf_list = &lgr->rmbs[i]; 802 else 803 buf_list = &lgr->sndbufs[i]; 804 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 805 list) { 806 list_del(&buf_desc->list); 807 smc_buf_free(lgr, is_rmb, buf_desc); 808 } 809 } 810 } 811 812 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 813 { 814 /* free send buffers */ 815 __smc_lgr_free_bufs(lgr, false); 816 /* free rmbs */ 817 __smc_lgr_free_bufs(lgr, true); 818 } 819 820 /* remove a link group */ 821 static void smc_lgr_free(struct smc_link_group *lgr) 822 { 823 int i; 824 825 if (!lgr->is_smcd) { 826 mutex_lock(&lgr->llc_conf_mutex); 827 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 828 if (lgr->lnk[i].state != SMC_LNK_UNUSED) 829 smcr_link_clear(&lgr->lnk[i], false); 830 } 831 mutex_unlock(&lgr->llc_conf_mutex); 832 smc_llc_lgr_clear(lgr); 833 } 834 835 smc_lgr_free_bufs(lgr); 836 if (lgr->is_smcd) { 837 if (!lgr->terminating) { 838 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 839 put_device(&lgr->smcd->dev); 840 } 841 if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) 842 wake_up(&lgr->smcd->lgrs_deleted); 843 } else { 844 if (!atomic_dec_return(&lgr_cnt)) 845 wake_up(&lgrs_deleted); 846 } 847 kfree(lgr); 848 } 849 850 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) 851 { 852 int i; 853 854 for (i = 0; i < SMC_RMBE_SIZES; i++) { 855 struct smc_buf_desc *buf_desc; 856 857 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { 858 buf_desc->len += sizeof(struct smcd_cdc_msg); 859 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 860 } 861 } 862 } 863 864 static void smc_sk_wake_ups(struct smc_sock *smc) 865 { 866 smc->sk.sk_write_space(&smc->sk); 867 smc->sk.sk_data_ready(&smc->sk); 868 smc->sk.sk_state_change(&smc->sk); 869 } 870 871 /* kill a connection */ 872 static void smc_conn_kill(struct smc_connection *conn, bool soft) 873 { 874 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 875 876 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) 877 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 878 else 879 smc_close_abort(conn); 880 conn->killed = 1; 881 smc->sk.sk_err = ECONNABORTED; 882 smc_sk_wake_ups(smc); 883 if (conn->lgr->is_smcd) { 884 smc_ism_unset_conn(conn); 885 if (soft) 886 tasklet_kill(&conn->rx_tsklet); 887 else 888 tasklet_unlock_wait(&conn->rx_tsklet); 889 } else { 890 smc_cdc_tx_dismiss_slots(conn); 891 } 892 smc_lgr_unregister_conn(conn); 893 smc_close_active_abort(smc); 894 } 895 896 static void smc_lgr_cleanup(struct smc_link_group *lgr) 897 { 898 if (lgr->is_smcd) { 899 smc_ism_signal_shutdown(lgr); 900 smcd_unregister_all_dmbs(lgr); 901 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 902 put_device(&lgr->smcd->dev); 903 } else { 904 u32 rsn = lgr->llc_termination_rsn; 905 906 if (!rsn) 907 rsn = SMC_LLC_DEL_PROG_INIT_TERM; 908 smc_llc_send_link_delete_all(lgr, false, rsn); 909 smcr_lgr_link_deactivate_all(lgr); 910 } 911 } 912 913 /* terminate link group 914 * @soft: true if link group shutdown can take its time 915 * false if immediate link group shutdown is required 916 */ 917 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) 918 { 919 struct smc_connection *conn; 920 struct smc_sock *smc; 921 struct rb_node *node; 922 923 if (lgr->terminating) 924 return; /* lgr already terminating */ 925 /* cancel free_work sync, will terminate when lgr->freeing is set */ 926 cancel_delayed_work_sync(&lgr->free_work); 927 lgr->terminating = 1; 928 929 /* kill remaining link group connections */ 930 read_lock_bh(&lgr->conns_lock); 931 node = rb_first(&lgr->conns_all); 932 while (node) { 933 read_unlock_bh(&lgr->conns_lock); 934 conn = rb_entry(node, struct smc_connection, alert_node); 935 smc = container_of(conn, struct smc_sock, conn); 936 sock_hold(&smc->sk); /* sock_put below */ 937 lock_sock(&smc->sk); 938 smc_conn_kill(conn, soft); 939 release_sock(&smc->sk); 940 sock_put(&smc->sk); /* sock_hold above */ 941 read_lock_bh(&lgr->conns_lock); 942 node = rb_first(&lgr->conns_all); 943 } 944 read_unlock_bh(&lgr->conns_lock); 945 smc_lgr_cleanup(lgr); 946 smc_lgr_free(lgr); 947 } 948 949 /* unlink link group and schedule termination */ 950 void smc_lgr_terminate_sched(struct smc_link_group *lgr) 951 { 952 spinlock_t *lgr_lock; 953 954 smc_lgr_list_head(lgr, &lgr_lock); 955 spin_lock_bh(lgr_lock); 956 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { 957 spin_unlock_bh(lgr_lock); 958 return; /* lgr already terminating */ 959 } 960 list_del_init(&lgr->list); 961 lgr->freeing = 1; 962 spin_unlock_bh(lgr_lock); 963 schedule_work(&lgr->terminate_work); 964 } 965 966 /* Called when peer lgr shutdown (regularly or abnormally) is received */ 967 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 968 { 969 struct smc_link_group *lgr, *l; 970 LIST_HEAD(lgr_free_list); 971 972 /* run common cleanup function and build free list */ 973 spin_lock_bh(&dev->lgr_lock); 974 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { 975 if ((!peer_gid || lgr->peer_gid == peer_gid) && 976 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 977 if (peer_gid) /* peer triggered termination */ 978 lgr->peer_shutdown = 1; 979 list_move(&lgr->list, &lgr_free_list); 980 lgr->freeing = 1; 981 } 982 } 983 spin_unlock_bh(&dev->lgr_lock); 984 985 /* cancel the regular free workers and actually free lgrs */ 986 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 987 list_del_init(&lgr->list); 988 schedule_work(&lgr->terminate_work); 989 } 990 } 991 992 /* Called when an SMCD device is removed or the smc module is unloaded */ 993 void smc_smcd_terminate_all(struct smcd_dev *smcd) 994 { 995 struct smc_link_group *lgr, *lg; 996 LIST_HEAD(lgr_free_list); 997 998 spin_lock_bh(&smcd->lgr_lock); 999 list_splice_init(&smcd->lgr_list, &lgr_free_list); 1000 list_for_each_entry(lgr, &lgr_free_list, list) 1001 lgr->freeing = 1; 1002 spin_unlock_bh(&smcd->lgr_lock); 1003 1004 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 1005 list_del_init(&lgr->list); 1006 __smc_lgr_terminate(lgr, false); 1007 } 1008 1009 if (atomic_read(&smcd->lgr_cnt)) 1010 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); 1011 } 1012 1013 /* Called when an SMCR device is removed or the smc module is unloaded. 1014 * If smcibdev is given, all SMCR link groups using this device are terminated. 1015 * If smcibdev is NULL, all SMCR link groups are terminated. 1016 */ 1017 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) 1018 { 1019 struct smc_link_group *lgr, *lg; 1020 LIST_HEAD(lgr_free_list); 1021 int i; 1022 1023 spin_lock_bh(&smc_lgr_list.lock); 1024 if (!smcibdev) { 1025 list_splice_init(&smc_lgr_list.list, &lgr_free_list); 1026 list_for_each_entry(lgr, &lgr_free_list, list) 1027 lgr->freeing = 1; 1028 } else { 1029 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { 1030 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1031 if (lgr->lnk[i].smcibdev == smcibdev) 1032 smcr_link_down_cond_sched(&lgr->lnk[i]); 1033 } 1034 } 1035 } 1036 spin_unlock_bh(&smc_lgr_list.lock); 1037 1038 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 1039 list_del_init(&lgr->list); 1040 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); 1041 __smc_lgr_terminate(lgr, false); 1042 } 1043 1044 if (smcibdev) { 1045 if (atomic_read(&smcibdev->lnk_cnt)) 1046 wait_event(smcibdev->lnks_deleted, 1047 !atomic_read(&smcibdev->lnk_cnt)); 1048 } else { 1049 if (atomic_read(&lgr_cnt)) 1050 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); 1051 } 1052 } 1053 1054 /* set new lgr type and clear all asymmetric link tagging */ 1055 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) 1056 { 1057 char *lgr_type = ""; 1058 int i; 1059 1060 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 1061 if (smc_link_usable(&lgr->lnk[i])) 1062 lgr->lnk[i].link_is_asym = false; 1063 if (lgr->type == new_type) 1064 return; 1065 lgr->type = new_type; 1066 1067 switch (lgr->type) { 1068 case SMC_LGR_NONE: 1069 lgr_type = "NONE"; 1070 break; 1071 case SMC_LGR_SINGLE: 1072 lgr_type = "SINGLE"; 1073 break; 1074 case SMC_LGR_SYMMETRIC: 1075 lgr_type = "SYMMETRIC"; 1076 break; 1077 case SMC_LGR_ASYMMETRIC_PEER: 1078 lgr_type = "ASYMMETRIC_PEER"; 1079 break; 1080 case SMC_LGR_ASYMMETRIC_LOCAL: 1081 lgr_type = "ASYMMETRIC_LOCAL"; 1082 break; 1083 } 1084 pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " 1085 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, 1086 lgr_type, lgr->pnet_id); 1087 } 1088 1089 /* set new lgr type and tag a link as asymmetric */ 1090 void smcr_lgr_set_type_asym(struct smc_link_group *lgr, 1091 enum smc_lgr_type new_type, int asym_lnk_idx) 1092 { 1093 smcr_lgr_set_type(lgr, new_type); 1094 lgr->lnk[asym_lnk_idx].link_is_asym = true; 1095 } 1096 1097 /* abort connection, abort_work scheduled from tasklet context */ 1098 static void smc_conn_abort_work(struct work_struct *work) 1099 { 1100 struct smc_connection *conn = container_of(work, 1101 struct smc_connection, 1102 abort_work); 1103 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 1104 1105 smc_conn_kill(conn, true); 1106 sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ 1107 } 1108 1109 /* link is up - establish alternate link if applicable */ 1110 static void smcr_link_up(struct smc_link_group *lgr, 1111 struct smc_ib_device *smcibdev, u8 ibport) 1112 { 1113 struct smc_link *link = NULL; 1114 1115 if (list_empty(&lgr->list) || 1116 lgr->type == SMC_LGR_SYMMETRIC || 1117 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1118 return; 1119 1120 if (lgr->role == SMC_SERV) { 1121 /* trigger local add link processing */ 1122 link = smc_llc_usable_link(lgr); 1123 if (!link) 1124 return; 1125 smc_llc_srv_add_link_local(link); 1126 } else { 1127 /* invite server to start add link processing */ 1128 u8 gid[SMC_GID_SIZE]; 1129 1130 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, 1131 NULL)) 1132 return; 1133 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1134 /* some other llc task is ongoing */ 1135 wait_event_timeout(lgr->llc_flow_waiter, 1136 (list_empty(&lgr->list) || 1137 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1138 SMC_LLC_WAIT_TIME); 1139 } 1140 /* lgr or device no longer active? */ 1141 if (!list_empty(&lgr->list) && 1142 smc_ib_port_active(smcibdev, ibport)) 1143 link = smc_llc_usable_link(lgr); 1144 if (link) 1145 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], 1146 gid, NULL, SMC_LLC_REQ); 1147 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ 1148 } 1149 } 1150 1151 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) 1152 { 1153 struct smc_ib_up_work *ib_work; 1154 struct smc_link_group *lgr, *n; 1155 1156 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1157 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1158 SMC_MAX_PNETID_LEN) || 1159 lgr->type == SMC_LGR_SYMMETRIC || 1160 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1161 continue; 1162 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); 1163 if (!ib_work) 1164 continue; 1165 INIT_WORK(&ib_work->work, smc_link_up_work); 1166 ib_work->lgr = lgr; 1167 ib_work->smcibdev = smcibdev; 1168 ib_work->ibport = ibport; 1169 schedule_work(&ib_work->work); 1170 } 1171 } 1172 1173 /* link is down - switch connections to alternate link, 1174 * must be called under lgr->llc_conf_mutex lock 1175 */ 1176 static void smcr_link_down(struct smc_link *lnk) 1177 { 1178 struct smc_link_group *lgr = lnk->lgr; 1179 struct smc_link *to_lnk; 1180 int del_link_id; 1181 1182 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) 1183 return; 1184 1185 smc_ib_modify_qp_reset(lnk); 1186 to_lnk = smc_switch_conns(lgr, lnk, true); 1187 if (!to_lnk) { /* no backup link available */ 1188 smcr_link_clear(lnk, true); 1189 return; 1190 } 1191 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); 1192 del_link_id = lnk->link_id; 1193 1194 if (lgr->role == SMC_SERV) { 1195 /* trigger local delete link processing */ 1196 smc_llc_srv_delete_link_local(to_lnk, del_link_id); 1197 } else { 1198 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1199 /* another llc task is ongoing */ 1200 mutex_unlock(&lgr->llc_conf_mutex); 1201 wait_event_timeout(lgr->llc_flow_waiter, 1202 (list_empty(&lgr->list) || 1203 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1204 SMC_LLC_WAIT_TIME); 1205 mutex_lock(&lgr->llc_conf_mutex); 1206 } 1207 if (!list_empty(&lgr->list)) 1208 smc_llc_send_delete_link(to_lnk, del_link_id, 1209 SMC_LLC_REQ, true, 1210 SMC_LLC_DEL_LOST_PATH); 1211 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ 1212 } 1213 } 1214 1215 /* must be called under lgr->llc_conf_mutex lock */ 1216 void smcr_link_down_cond(struct smc_link *lnk) 1217 { 1218 if (smc_link_downing(&lnk->state)) 1219 smcr_link_down(lnk); 1220 } 1221 1222 /* will get the lgr->llc_conf_mutex lock */ 1223 void smcr_link_down_cond_sched(struct smc_link *lnk) 1224 { 1225 if (smc_link_downing(&lnk->state)) 1226 schedule_work(&lnk->link_down_wrk); 1227 } 1228 1229 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) 1230 { 1231 struct smc_link_group *lgr, *n; 1232 int i; 1233 1234 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1235 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1236 SMC_MAX_PNETID_LEN)) 1237 continue; /* lgr is not affected */ 1238 if (list_empty(&lgr->list)) 1239 continue; 1240 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1241 struct smc_link *lnk = &lgr->lnk[i]; 1242 1243 if (smc_link_usable(lnk) && 1244 lnk->smcibdev == smcibdev && lnk->ibport == ibport) 1245 smcr_link_down_cond_sched(lnk); 1246 } 1247 } 1248 } 1249 1250 static void smc_link_up_work(struct work_struct *work) 1251 { 1252 struct smc_ib_up_work *ib_work = container_of(work, 1253 struct smc_ib_up_work, 1254 work); 1255 struct smc_link_group *lgr = ib_work->lgr; 1256 1257 if (list_empty(&lgr->list)) 1258 goto out; 1259 smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); 1260 out: 1261 kfree(ib_work); 1262 } 1263 1264 static void smc_link_down_work(struct work_struct *work) 1265 { 1266 struct smc_link *link = container_of(work, struct smc_link, 1267 link_down_wrk); 1268 struct smc_link_group *lgr = link->lgr; 1269 1270 if (list_empty(&lgr->list)) 1271 return; 1272 wake_up_all(&lgr->llc_msg_waiter); 1273 mutex_lock(&lgr->llc_conf_mutex); 1274 smcr_link_down(link); 1275 mutex_unlock(&lgr->llc_conf_mutex); 1276 } 1277 1278 /* Determine vlan of internal TCP socket. 1279 * @vlan_id: address to store the determined vlan id into 1280 */ 1281 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 1282 { 1283 struct dst_entry *dst = sk_dst_get(clcsock->sk); 1284 struct net_device *ndev; 1285 int i, nest_lvl, rc = 0; 1286 1287 ini->vlan_id = 0; 1288 if (!dst) { 1289 rc = -ENOTCONN; 1290 goto out; 1291 } 1292 if (!dst->dev) { 1293 rc = -ENODEV; 1294 goto out_rel; 1295 } 1296 1297 ndev = dst->dev; 1298 if (is_vlan_dev(ndev)) { 1299 ini->vlan_id = vlan_dev_vlan_id(ndev); 1300 goto out_rel; 1301 } 1302 1303 rtnl_lock(); 1304 nest_lvl = ndev->lower_level; 1305 for (i = 0; i < nest_lvl; i++) { 1306 struct list_head *lower = &ndev->adj_list.lower; 1307 1308 if (list_empty(lower)) 1309 break; 1310 lower = lower->next; 1311 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 1312 if (is_vlan_dev(ndev)) { 1313 ini->vlan_id = vlan_dev_vlan_id(ndev); 1314 break; 1315 } 1316 } 1317 rtnl_unlock(); 1318 1319 out_rel: 1320 dst_release(dst); 1321 out: 1322 return rc; 1323 } 1324 1325 static bool smcr_lgr_match(struct smc_link_group *lgr, 1326 struct smc_clc_msg_local *lcl, 1327 enum smc_lgr_role role, u32 clcqpn) 1328 { 1329 int i; 1330 1331 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || 1332 lgr->role != role) 1333 return false; 1334 1335 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1336 if (lgr->lnk[i].state != SMC_LNK_ACTIVE) 1337 continue; 1338 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && 1339 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && 1340 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) 1341 return true; 1342 } 1343 return false; 1344 } 1345 1346 static bool smcd_lgr_match(struct smc_link_group *lgr, 1347 struct smcd_dev *smcismdev, u64 peer_gid) 1348 { 1349 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 1350 } 1351 1352 /* create a new SMC connection (and a new link group if necessary) */ 1353 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 1354 { 1355 struct smc_connection *conn = &smc->conn; 1356 struct list_head *lgr_list; 1357 struct smc_link_group *lgr; 1358 enum smc_lgr_role role; 1359 spinlock_t *lgr_lock; 1360 int rc = 0; 1361 1362 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; 1363 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; 1364 ini->cln_first_contact = SMC_FIRST_CONTACT; 1365 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 1366 if (role == SMC_CLNT && ini->srv_first_contact) 1367 /* create new link group as well */ 1368 goto create; 1369 1370 /* determine if an existing link group can be reused */ 1371 spin_lock_bh(lgr_lock); 1372 list_for_each_entry(lgr, lgr_list, list) { 1373 write_lock_bh(&lgr->conns_lock); 1374 if ((ini->is_smcd ? 1375 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 1376 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 1377 !lgr->sync_err && 1378 lgr->vlan_id == ini->vlan_id && 1379 (role == SMC_CLNT || 1380 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 1381 /* link group found */ 1382 ini->cln_first_contact = SMC_REUSE_CONTACT; 1383 conn->lgr = lgr; 1384 rc = smc_lgr_register_conn(conn, false); 1385 write_unlock_bh(&lgr->conns_lock); 1386 if (!rc && delayed_work_pending(&lgr->free_work)) 1387 cancel_delayed_work(&lgr->free_work); 1388 break; 1389 } 1390 write_unlock_bh(&lgr->conns_lock); 1391 } 1392 spin_unlock_bh(lgr_lock); 1393 if (rc) 1394 return rc; 1395 1396 if (role == SMC_CLNT && !ini->srv_first_contact && 1397 ini->cln_first_contact == SMC_FIRST_CONTACT) { 1398 /* Server reuses a link group, but Client wants to start 1399 * a new one 1400 * send out_of_sync decline, reason synchr. error 1401 */ 1402 return SMC_CLC_DECL_SYNCERR; 1403 } 1404 1405 create: 1406 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 1407 rc = smc_lgr_create(smc, ini); 1408 if (rc) 1409 goto out; 1410 lgr = conn->lgr; 1411 write_lock_bh(&lgr->conns_lock); 1412 rc = smc_lgr_register_conn(conn, true); 1413 write_unlock_bh(&lgr->conns_lock); 1414 if (rc) 1415 goto out; 1416 } 1417 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 1418 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 1419 conn->urg_state = SMC_URG_READ; 1420 INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); 1421 if (ini->is_smcd) { 1422 conn->rx_off = sizeof(struct smcd_cdc_msg); 1423 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 1424 } 1425 #ifndef KERNEL_HAS_ATOMIC64 1426 spin_lock_init(&conn->acurs_lock); 1427 #endif 1428 1429 out: 1430 return rc; 1431 } 1432 1433 /* convert the RMB size into the compressed notation - minimum 16K. 1434 * In contrast to plain ilog2, this rounds towards the next power of 2, 1435 * so the socket application gets at least its desired sndbuf / rcvbuf size. 1436 */ 1437 static u8 smc_compress_bufsize(int size) 1438 { 1439 u8 compressed; 1440 1441 if (size <= SMC_BUF_MIN_SIZE) 1442 return 0; 1443 1444 size = (size - 1) >> 14; 1445 compressed = ilog2(size) + 1; 1446 if (compressed >= SMC_RMBE_SIZES) 1447 compressed = SMC_RMBE_SIZES - 1; 1448 return compressed; 1449 } 1450 1451 /* convert the RMB size from compressed notation into integer */ 1452 int smc_uncompress_bufsize(u8 compressed) 1453 { 1454 u32 size; 1455 1456 size = 0x00000001 << (((int)compressed) + 14); 1457 return (int)size; 1458 } 1459 1460 /* try to reuse a sndbuf or rmb description slot for a certain 1461 * buffer size; if not available, return NULL 1462 */ 1463 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 1464 struct mutex *lock, 1465 struct list_head *buf_list) 1466 { 1467 struct smc_buf_desc *buf_slot; 1468 1469 mutex_lock(lock); 1470 list_for_each_entry(buf_slot, buf_list, list) { 1471 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 1472 mutex_unlock(lock); 1473 return buf_slot; 1474 } 1475 } 1476 mutex_unlock(lock); 1477 return NULL; 1478 } 1479 1480 /* one of the conditions for announcing a receiver's current window size is 1481 * that it "results in a minimum increase in the window size of 10% of the 1482 * receive buffer space" [RFC7609] 1483 */ 1484 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 1485 { 1486 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 1487 } 1488 1489 /* map an rmb buf to a link */ 1490 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, 1491 struct smc_link *lnk) 1492 { 1493 int rc; 1494 1495 if (buf_desc->is_map_ib[lnk->link_idx]) 1496 return 0; 1497 1498 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); 1499 if (rc) 1500 return rc; 1501 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, 1502 buf_desc->cpu_addr, buf_desc->len); 1503 1504 /* map sg table to DMA address */ 1505 rc = smc_ib_buf_map_sg(lnk, buf_desc, 1506 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1507 /* SMC protocol depends on mapping to one DMA address only */ 1508 if (rc != 1) { 1509 rc = -EAGAIN; 1510 goto free_table; 1511 } 1512 1513 /* create a new memory region for the RMB */ 1514 if (is_rmb) { 1515 rc = smc_ib_get_memory_region(lnk->roce_pd, 1516 IB_ACCESS_REMOTE_WRITE | 1517 IB_ACCESS_LOCAL_WRITE, 1518 buf_desc, lnk->link_idx); 1519 if (rc) 1520 goto buf_unmap; 1521 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); 1522 } 1523 buf_desc->is_map_ib[lnk->link_idx] = true; 1524 return 0; 1525 1526 buf_unmap: 1527 smc_ib_buf_unmap_sg(lnk, buf_desc, 1528 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1529 free_table: 1530 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 1531 return rc; 1532 } 1533 1534 /* register a new rmb on IB device, 1535 * must be called under lgr->llc_conf_mutex lock 1536 */ 1537 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) 1538 { 1539 if (list_empty(&link->lgr->list)) 1540 return -ENOLINK; 1541 if (!rmb_desc->is_reg_mr[link->link_idx]) { 1542 /* register memory region for new rmb */ 1543 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { 1544 rmb_desc->is_reg_err = true; 1545 return -EFAULT; 1546 } 1547 rmb_desc->is_reg_mr[link->link_idx] = true; 1548 } 1549 return 0; 1550 } 1551 1552 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, 1553 struct list_head *lst, bool is_rmb) 1554 { 1555 struct smc_buf_desc *buf_desc, *bf; 1556 int rc = 0; 1557 1558 mutex_lock(lock); 1559 list_for_each_entry_safe(buf_desc, bf, lst, list) { 1560 if (!buf_desc->used) 1561 continue; 1562 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); 1563 if (rc) 1564 goto out; 1565 } 1566 out: 1567 mutex_unlock(lock); 1568 return rc; 1569 } 1570 1571 /* map all used buffers of lgr for a new link */ 1572 int smcr_buf_map_lgr(struct smc_link *lnk) 1573 { 1574 struct smc_link_group *lgr = lnk->lgr; 1575 int i, rc = 0; 1576 1577 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1578 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, 1579 &lgr->rmbs[i], true); 1580 if (rc) 1581 return rc; 1582 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, 1583 &lgr->sndbufs[i], false); 1584 if (rc) 1585 return rc; 1586 } 1587 return 0; 1588 } 1589 1590 /* register all used buffers of lgr for a new link, 1591 * must be called under lgr->llc_conf_mutex lock 1592 */ 1593 int smcr_buf_reg_lgr(struct smc_link *lnk) 1594 { 1595 struct smc_link_group *lgr = lnk->lgr; 1596 struct smc_buf_desc *buf_desc, *bf; 1597 int i, rc = 0; 1598 1599 mutex_lock(&lgr->rmbs_lock); 1600 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1601 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { 1602 if (!buf_desc->used) 1603 continue; 1604 rc = smcr_link_reg_rmb(lnk, buf_desc); 1605 if (rc) 1606 goto out; 1607 } 1608 } 1609 out: 1610 mutex_unlock(&lgr->rmbs_lock); 1611 return rc; 1612 } 1613 1614 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 1615 bool is_rmb, int bufsize) 1616 { 1617 struct smc_buf_desc *buf_desc; 1618 1619 /* try to alloc a new buffer */ 1620 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1621 if (!buf_desc) 1622 return ERR_PTR(-ENOMEM); 1623 1624 buf_desc->order = get_order(bufsize); 1625 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 1626 __GFP_NOMEMALLOC | __GFP_COMP | 1627 __GFP_NORETRY | __GFP_ZERO, 1628 buf_desc->order); 1629 if (!buf_desc->pages) { 1630 kfree(buf_desc); 1631 return ERR_PTR(-EAGAIN); 1632 } 1633 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 1634 buf_desc->len = bufsize; 1635 return buf_desc; 1636 } 1637 1638 /* map buf_desc on all usable links, 1639 * unused buffers stay mapped as long as the link is up 1640 */ 1641 static int smcr_buf_map_usable_links(struct smc_link_group *lgr, 1642 struct smc_buf_desc *buf_desc, bool is_rmb) 1643 { 1644 int i, rc = 0; 1645 1646 /* protect against parallel link reconfiguration */ 1647 mutex_lock(&lgr->llc_conf_mutex); 1648 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1649 struct smc_link *lnk = &lgr->lnk[i]; 1650 1651 if (!smc_link_usable(lnk)) 1652 continue; 1653 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { 1654 rc = -ENOMEM; 1655 goto out; 1656 } 1657 } 1658 out: 1659 mutex_unlock(&lgr->llc_conf_mutex); 1660 return rc; 1661 } 1662 1663 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 1664 1665 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 1666 bool is_dmb, int bufsize) 1667 { 1668 struct smc_buf_desc *buf_desc; 1669 int rc; 1670 1671 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 1672 return ERR_PTR(-EAGAIN); 1673 1674 /* try to alloc a new DMB */ 1675 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1676 if (!buf_desc) 1677 return ERR_PTR(-ENOMEM); 1678 if (is_dmb) { 1679 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 1680 if (rc) { 1681 kfree(buf_desc); 1682 return ERR_PTR(-EAGAIN); 1683 } 1684 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 1685 /* CDC header stored in buf. So, pretend it was smaller */ 1686 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 1687 } else { 1688 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 1689 __GFP_NOWARN | __GFP_NORETRY | 1690 __GFP_NOMEMALLOC); 1691 if (!buf_desc->cpu_addr) { 1692 kfree(buf_desc); 1693 return ERR_PTR(-EAGAIN); 1694 } 1695 buf_desc->len = bufsize; 1696 } 1697 return buf_desc; 1698 } 1699 1700 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 1701 { 1702 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 1703 struct smc_connection *conn = &smc->conn; 1704 struct smc_link_group *lgr = conn->lgr; 1705 struct list_head *buf_list; 1706 int bufsize, bufsize_short; 1707 struct mutex *lock; /* lock buffer list */ 1708 int sk_buf_size; 1709 1710 if (is_rmb) 1711 /* use socket recv buffer size (w/o overhead) as start value */ 1712 sk_buf_size = smc->sk.sk_rcvbuf / 2; 1713 else 1714 /* use socket send buffer size (w/o overhead) as start value */ 1715 sk_buf_size = smc->sk.sk_sndbuf / 2; 1716 1717 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 1718 bufsize_short >= 0; bufsize_short--) { 1719 1720 if (is_rmb) { 1721 lock = &lgr->rmbs_lock; 1722 buf_list = &lgr->rmbs[bufsize_short]; 1723 } else { 1724 lock = &lgr->sndbufs_lock; 1725 buf_list = &lgr->sndbufs[bufsize_short]; 1726 } 1727 bufsize = smc_uncompress_bufsize(bufsize_short); 1728 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 1729 continue; 1730 1731 /* check for reusable slot in the link group */ 1732 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 1733 if (buf_desc) { 1734 memset(buf_desc->cpu_addr, 0, bufsize); 1735 break; /* found reusable slot */ 1736 } 1737 1738 if (is_smcd) 1739 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 1740 else 1741 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 1742 1743 if (PTR_ERR(buf_desc) == -ENOMEM) 1744 break; 1745 if (IS_ERR(buf_desc)) 1746 continue; 1747 1748 buf_desc->used = 1; 1749 mutex_lock(lock); 1750 list_add(&buf_desc->list, buf_list); 1751 mutex_unlock(lock); 1752 break; /* found */ 1753 } 1754 1755 if (IS_ERR(buf_desc)) 1756 return -ENOMEM; 1757 1758 if (!is_smcd) { 1759 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { 1760 smcr_buf_unuse(buf_desc, lgr); 1761 return -ENOMEM; 1762 } 1763 } 1764 1765 if (is_rmb) { 1766 conn->rmb_desc = buf_desc; 1767 conn->rmbe_size_short = bufsize_short; 1768 smc->sk.sk_rcvbuf = bufsize * 2; 1769 atomic_set(&conn->bytes_to_rcv, 0); 1770 conn->rmbe_update_limit = 1771 smc_rmb_wnd_update_limit(buf_desc->len); 1772 if (is_smcd) 1773 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 1774 } else { 1775 conn->sndbuf_desc = buf_desc; 1776 smc->sk.sk_sndbuf = bufsize * 2; 1777 atomic_set(&conn->sndbuf_space, bufsize); 1778 } 1779 return 0; 1780 } 1781 1782 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 1783 { 1784 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1785 return; 1786 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1787 } 1788 1789 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 1790 { 1791 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1792 return; 1793 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1794 } 1795 1796 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 1797 { 1798 int i; 1799 1800 if (!conn->lgr || conn->lgr->is_smcd) 1801 return; 1802 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1803 if (!smc_link_usable(&conn->lgr->lnk[i])) 1804 continue; 1805 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, 1806 DMA_FROM_DEVICE); 1807 } 1808 } 1809 1810 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 1811 { 1812 int i; 1813 1814 if (!conn->lgr || conn->lgr->is_smcd) 1815 return; 1816 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1817 if (!smc_link_usable(&conn->lgr->lnk[i])) 1818 continue; 1819 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, 1820 DMA_FROM_DEVICE); 1821 } 1822 } 1823 1824 /* create the send and receive buffer for an SMC socket; 1825 * receive buffers are called RMBs; 1826 * (even though the SMC protocol allows more than one RMB-element per RMB, 1827 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 1828 * extra RMB for every connection in a link group 1829 */ 1830 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 1831 { 1832 int rc; 1833 1834 /* create send buffer */ 1835 rc = __smc_buf_create(smc, is_smcd, false); 1836 if (rc) 1837 return rc; 1838 /* create rmb */ 1839 rc = __smc_buf_create(smc, is_smcd, true); 1840 if (rc) 1841 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 1842 return rc; 1843 } 1844 1845 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 1846 { 1847 int i; 1848 1849 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 1850 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 1851 return i; 1852 } 1853 return -ENOSPC; 1854 } 1855 1856 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, 1857 u32 rkey) 1858 { 1859 int i; 1860 1861 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1862 if (test_bit(i, lgr->rtokens_used_mask) && 1863 lgr->rtokens[i][lnk_idx].rkey == rkey) 1864 return i; 1865 } 1866 return -ENOENT; 1867 } 1868 1869 /* set rtoken for a new link to an existing rmb */ 1870 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, 1871 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) 1872 { 1873 int rtok_idx; 1874 1875 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); 1876 if (rtok_idx == -ENOENT) 1877 return; 1878 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); 1879 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); 1880 } 1881 1882 /* set rtoken for a new link whose link_id is given */ 1883 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, 1884 __be64 nw_vaddr, __be32 nw_rkey) 1885 { 1886 u64 dma_addr = be64_to_cpu(nw_vaddr); 1887 u32 rkey = ntohl(nw_rkey); 1888 bool found = false; 1889 int link_idx; 1890 1891 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { 1892 if (lgr->lnk[link_idx].link_id == link_id) { 1893 found = true; 1894 break; 1895 } 1896 } 1897 if (!found) 1898 return; 1899 lgr->rtokens[rtok_idx][link_idx].rkey = rkey; 1900 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; 1901 } 1902 1903 /* add a new rtoken from peer */ 1904 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) 1905 { 1906 struct smc_link_group *lgr = smc_get_lgr(lnk); 1907 u64 dma_addr = be64_to_cpu(nw_vaddr); 1908 u32 rkey = ntohl(nw_rkey); 1909 int i; 1910 1911 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1912 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1913 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && 1914 test_bit(i, lgr->rtokens_used_mask)) { 1915 /* already in list */ 1916 return i; 1917 } 1918 } 1919 i = smc_rmb_reserve_rtoken_idx(lgr); 1920 if (i < 0) 1921 return i; 1922 lgr->rtokens[i][lnk->link_idx].rkey = rkey; 1923 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; 1924 return i; 1925 } 1926 1927 /* delete an rtoken from all links */ 1928 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) 1929 { 1930 struct smc_link_group *lgr = smc_get_lgr(lnk); 1931 u32 rkey = ntohl(nw_rkey); 1932 int i, j; 1933 1934 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1935 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1936 test_bit(i, lgr->rtokens_used_mask)) { 1937 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { 1938 lgr->rtokens[i][j].rkey = 0; 1939 lgr->rtokens[i][j].dma_addr = 0; 1940 } 1941 clear_bit(i, lgr->rtokens_used_mask); 1942 return 0; 1943 } 1944 } 1945 return -ENOENT; 1946 } 1947 1948 /* save rkey and dma_addr received from peer during clc handshake */ 1949 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1950 struct smc_link *lnk, 1951 struct smc_clc_msg_accept_confirm *clc) 1952 { 1953 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, 1954 clc->rmb_rkey); 1955 if (conn->rtoken_idx < 0) 1956 return conn->rtoken_idx; 1957 return 0; 1958 } 1959 1960 static void smc_core_going_away(void) 1961 { 1962 struct smc_ib_device *smcibdev; 1963 struct smcd_dev *smcd; 1964 1965 mutex_lock(&smc_ib_devices.mutex); 1966 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { 1967 int i; 1968 1969 for (i = 0; i < SMC_MAX_PORTS; i++) 1970 set_bit(i, smcibdev->ports_going_away); 1971 } 1972 mutex_unlock(&smc_ib_devices.mutex); 1973 1974 mutex_lock(&smcd_dev_list.mutex); 1975 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1976 smcd->going_away = 1; 1977 } 1978 mutex_unlock(&smcd_dev_list.mutex); 1979 } 1980 1981 /* Clean up all SMC link groups */ 1982 static void smc_lgrs_shutdown(void) 1983 { 1984 struct smcd_dev *smcd; 1985 1986 smc_core_going_away(); 1987 1988 smc_smcr_terminate_all(NULL); 1989 1990 mutex_lock(&smcd_dev_list.mutex); 1991 list_for_each_entry(smcd, &smcd_dev_list.list, list) 1992 smc_smcd_terminate_all(smcd); 1993 mutex_unlock(&smcd_dev_list.mutex); 1994 } 1995 1996 static int smc_core_reboot_event(struct notifier_block *this, 1997 unsigned long event, void *ptr) 1998 { 1999 smc_lgrs_shutdown(); 2000 smc_ib_unregister_client(); 2001 return 0; 2002 } 2003 2004 static struct notifier_block smc_reboot_notifier = { 2005 .notifier_call = smc_core_reboot_event, 2006 }; 2007 2008 int __init smc_core_init(void) 2009 { 2010 return register_reboot_notifier(&smc_reboot_notifier); 2011 } 2012 2013 /* Called (from smc_exit) when module is removed */ 2014 void smc_core_exit(void) 2015 { 2016 unregister_reboot_notifier(&smc_reboot_notifier); 2017 smc_lgrs_shutdown(); 2018 } 2019