1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <linux/wait.h> 17 #include <linux/reboot.h> 18 #include <net/tcp.h> 19 #include <net/sock.h> 20 #include <rdma/ib_verbs.h> 21 #include <rdma/ib_cache.h> 22 23 #include "smc.h" 24 #include "smc_clc.h" 25 #include "smc_core.h" 26 #include "smc_ib.h" 27 #include "smc_wr.h" 28 #include "smc_llc.h" 29 #include "smc_cdc.h" 30 #include "smc_close.h" 31 #include "smc_ism.h" 32 33 #define SMC_LGR_NUM_INCR 256 34 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 35 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 36 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 37 38 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 39 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 40 .list = LIST_HEAD_INIT(smc_lgr_list.list), 41 .num = 0, 42 }; 43 44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ 45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); 46 47 struct smc_ib_up_work { 48 struct work_struct work; 49 struct smc_link_group *lgr; 50 struct smc_ib_device *smcibdev; 51 u8 ibport; 52 }; 53 54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 55 struct smc_buf_desc *buf_desc); 56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); 57 58 static void smc_link_up_work(struct work_struct *work); 59 static void smc_link_down_work(struct work_struct *work); 60 61 /* return head of link group list and its lock for a given link group */ 62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, 63 spinlock_t **lgr_lock) 64 { 65 if (lgr->is_smcd) { 66 *lgr_lock = &lgr->smcd->lgr_lock; 67 return &lgr->smcd->lgr_list; 68 } 69 70 *lgr_lock = &smc_lgr_list.lock; 71 return &smc_lgr_list.list; 72 } 73 74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 75 { 76 /* client link group creation always follows the server link group 77 * creation. For client use a somewhat higher removal delay time, 78 * otherwise there is a risk of out-of-sync link groups. 79 */ 80 if (!lgr->freeing && !lgr->freefast) { 81 mod_delayed_work(system_wq, &lgr->free_work, 82 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 83 SMC_LGR_FREE_DELAY_CLNT : 84 SMC_LGR_FREE_DELAY_SERV); 85 } 86 } 87 88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 89 { 90 if (!lgr->freeing && !lgr->freefast) { 91 lgr->freefast = 1; 92 mod_delayed_work(system_wq, &lgr->free_work, 93 SMC_LGR_FREE_DELAY_FAST); 94 } 95 } 96 97 /* Register connection's alert token in our lookup structure. 98 * To use rbtrees we have to implement our own insert core. 99 * Requires @conns_lock 100 * @smc connection to register 101 * Returns 0 on success, != otherwise. 102 */ 103 static void smc_lgr_add_alert_token(struct smc_connection *conn) 104 { 105 struct rb_node **link, *parent = NULL; 106 u32 token = conn->alert_token_local; 107 108 link = &conn->lgr->conns_all.rb_node; 109 while (*link) { 110 struct smc_connection *cur = rb_entry(*link, 111 struct smc_connection, alert_node); 112 113 parent = *link; 114 if (cur->alert_token_local > token) 115 link = &parent->rb_left; 116 else 117 link = &parent->rb_right; 118 } 119 /* Put the new node there */ 120 rb_link_node(&conn->alert_node, parent, link); 121 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 122 } 123 124 /* assign an SMC-R link to the connection */ 125 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) 126 { 127 enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : 128 SMC_LNK_ACTIVE; 129 int i, j; 130 131 /* do link balancing */ 132 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 133 struct smc_link *lnk = &conn->lgr->lnk[i]; 134 135 if (lnk->state != expected || lnk->link_is_asym) 136 continue; 137 if (conn->lgr->role == SMC_CLNT) { 138 conn->lnk = lnk; /* temporary, SMC server assigns link*/ 139 break; 140 } 141 if (conn->lgr->conns_num % 2) { 142 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { 143 struct smc_link *lnk2; 144 145 lnk2 = &conn->lgr->lnk[j]; 146 if (lnk2->state == expected && 147 !lnk2->link_is_asym) { 148 conn->lnk = lnk2; 149 break; 150 } 151 } 152 } 153 if (!conn->lnk) 154 conn->lnk = lnk; 155 break; 156 } 157 if (!conn->lnk) 158 return SMC_CLC_DECL_NOACTLINK; 159 return 0; 160 } 161 162 /* Register connection in link group by assigning an alert token 163 * registered in a search tree. 164 * Requires @conns_lock 165 * Note that '0' is a reserved value and not assigned. 166 */ 167 static int smc_lgr_register_conn(struct smc_connection *conn, bool first) 168 { 169 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 170 static atomic_t nexttoken = ATOMIC_INIT(0); 171 int rc; 172 173 if (!conn->lgr->is_smcd) { 174 rc = smcr_lgr_conn_assign_link(conn, first); 175 if (rc) 176 return rc; 177 } 178 /* find a new alert_token_local value not yet used by some connection 179 * in this link group 180 */ 181 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 182 while (!conn->alert_token_local) { 183 conn->alert_token_local = atomic_inc_return(&nexttoken); 184 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 185 conn->alert_token_local = 0; 186 } 187 smc_lgr_add_alert_token(conn); 188 conn->lgr->conns_num++; 189 return 0; 190 } 191 192 /* Unregister connection and reset the alert token of the given connection< 193 */ 194 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 195 { 196 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 197 struct smc_link_group *lgr = conn->lgr; 198 199 rb_erase(&conn->alert_node, &lgr->conns_all); 200 lgr->conns_num--; 201 conn->alert_token_local = 0; 202 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 203 } 204 205 /* Unregister connection from lgr 206 */ 207 static void smc_lgr_unregister_conn(struct smc_connection *conn) 208 { 209 struct smc_link_group *lgr = conn->lgr; 210 211 if (!lgr) 212 return; 213 write_lock_bh(&lgr->conns_lock); 214 if (conn->alert_token_local) { 215 __smc_lgr_unregister_conn(conn); 216 } 217 write_unlock_bh(&lgr->conns_lock); 218 conn->lgr = NULL; 219 } 220 221 void smc_lgr_cleanup_early(struct smc_connection *conn) 222 { 223 struct smc_link_group *lgr = conn->lgr; 224 struct list_head *lgr_list; 225 spinlock_t *lgr_lock; 226 227 if (!lgr) 228 return; 229 230 smc_conn_free(conn); 231 lgr_list = smc_lgr_list_head(lgr, &lgr_lock); 232 spin_lock_bh(lgr_lock); 233 /* do not use this link group for new connections */ 234 if (!list_empty(lgr_list)) 235 list_del_init(lgr_list); 236 spin_unlock_bh(lgr_lock); 237 smc_lgr_schedule_free_work_fast(lgr); 238 } 239 240 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) 241 { 242 int i; 243 244 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 245 struct smc_link *lnk = &lgr->lnk[i]; 246 247 if (smc_link_usable(lnk)) 248 lnk->state = SMC_LNK_INACTIVE; 249 } 250 wake_up_interruptible_all(&lgr->llc_waiter); 251 } 252 253 static void smc_lgr_free(struct smc_link_group *lgr); 254 255 static void smc_lgr_free_work(struct work_struct *work) 256 { 257 struct smc_link_group *lgr = container_of(to_delayed_work(work), 258 struct smc_link_group, 259 free_work); 260 spinlock_t *lgr_lock; 261 bool conns; 262 263 smc_lgr_list_head(lgr, &lgr_lock); 264 spin_lock_bh(lgr_lock); 265 if (lgr->freeing) { 266 spin_unlock_bh(lgr_lock); 267 return; 268 } 269 read_lock_bh(&lgr->conns_lock); 270 conns = RB_EMPTY_ROOT(&lgr->conns_all); 271 read_unlock_bh(&lgr->conns_lock); 272 if (!conns) { /* number of lgr connections is no longer zero */ 273 spin_unlock_bh(lgr_lock); 274 return; 275 } 276 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 277 lgr->freeing = 1; /* this instance does the freeing, no new schedule */ 278 spin_unlock_bh(lgr_lock); 279 cancel_delayed_work(&lgr->free_work); 280 281 if (!lgr->is_smcd && !lgr->terminating) 282 smc_llc_send_link_delete_all(lgr, true, 283 SMC_LLC_DEL_PROG_INIT_TERM); 284 if (lgr->is_smcd && !lgr->terminating) 285 smc_ism_signal_shutdown(lgr); 286 if (!lgr->is_smcd) 287 smcr_lgr_link_deactivate_all(lgr); 288 smc_lgr_free(lgr); 289 } 290 291 static void smc_lgr_terminate_work(struct work_struct *work) 292 { 293 struct smc_link_group *lgr = container_of(work, struct smc_link_group, 294 terminate_work); 295 296 __smc_lgr_terminate(lgr, true); 297 } 298 299 /* return next unique link id for the lgr */ 300 static u8 smcr_next_link_id(struct smc_link_group *lgr) 301 { 302 u8 link_id; 303 int i; 304 305 while (1) { 306 link_id = ++lgr->next_link_id; 307 if (!link_id) /* skip zero as link_id */ 308 link_id = ++lgr->next_link_id; 309 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 310 if (smc_link_usable(&lgr->lnk[i]) && 311 lgr->lnk[i].link_id == link_id) 312 continue; 313 } 314 break; 315 } 316 return link_id; 317 } 318 319 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, 320 u8 link_idx, struct smc_init_info *ini) 321 { 322 u8 rndvec[3]; 323 int rc; 324 325 get_device(&ini->ib_dev->ibdev->dev); 326 atomic_inc(&ini->ib_dev->lnk_cnt); 327 lnk->state = SMC_LNK_ACTIVATING; 328 lnk->link_id = smcr_next_link_id(lgr); 329 lnk->lgr = lgr; 330 lnk->link_idx = link_idx; 331 lnk->smcibdev = ini->ib_dev; 332 lnk->ibport = ini->ib_port; 333 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 334 smc_llc_link_set_uid(lnk); 335 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); 336 if (!ini->ib_dev->initialized) { 337 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); 338 if (rc) 339 goto out; 340 } 341 get_random_bytes(rndvec, sizeof(rndvec)); 342 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 343 (rndvec[2] << 16); 344 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 345 ini->vlan_id, lnk->gid, &lnk->sgid_index); 346 if (rc) 347 goto out; 348 rc = smc_llc_link_init(lnk); 349 if (rc) 350 goto out; 351 rc = smc_wr_alloc_link_mem(lnk); 352 if (rc) 353 goto clear_llc_lnk; 354 rc = smc_ib_create_protection_domain(lnk); 355 if (rc) 356 goto free_link_mem; 357 rc = smc_ib_create_queue_pair(lnk); 358 if (rc) 359 goto dealloc_pd; 360 rc = smc_wr_create_link(lnk); 361 if (rc) 362 goto destroy_qp; 363 return 0; 364 365 destroy_qp: 366 smc_ib_destroy_queue_pair(lnk); 367 dealloc_pd: 368 smc_ib_dealloc_protection_domain(lnk); 369 free_link_mem: 370 smc_wr_free_link_mem(lnk); 371 clear_llc_lnk: 372 smc_llc_link_clear(lnk, false); 373 out: 374 put_device(&ini->ib_dev->ibdev->dev); 375 memset(lnk, 0, sizeof(struct smc_link)); 376 lnk->state = SMC_LNK_UNUSED; 377 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) 378 wake_up(&ini->ib_dev->lnks_deleted); 379 return rc; 380 } 381 382 /* create a new SMC link group */ 383 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 384 { 385 struct smc_link_group *lgr; 386 struct list_head *lgr_list; 387 struct smc_link *lnk; 388 spinlock_t *lgr_lock; 389 u8 link_idx; 390 int rc = 0; 391 int i; 392 393 if (ini->is_smcd && ini->vlan_id) { 394 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 395 rc = SMC_CLC_DECL_ISMVLANERR; 396 goto out; 397 } 398 } 399 400 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 401 if (!lgr) { 402 rc = SMC_CLC_DECL_MEM; 403 goto ism_put_vlan; 404 } 405 lgr->is_smcd = ini->is_smcd; 406 lgr->sync_err = 0; 407 lgr->terminating = 0; 408 lgr->freefast = 0; 409 lgr->freeing = 0; 410 lgr->vlan_id = ini->vlan_id; 411 mutex_init(&lgr->sndbufs_lock); 412 mutex_init(&lgr->rmbs_lock); 413 rwlock_init(&lgr->conns_lock); 414 for (i = 0; i < SMC_RMBE_SIZES; i++) { 415 INIT_LIST_HEAD(&lgr->sndbufs[i]); 416 INIT_LIST_HEAD(&lgr->rmbs[i]); 417 } 418 lgr->next_link_id = 0; 419 smc_lgr_list.num += SMC_LGR_NUM_INCR; 420 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 421 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 422 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); 423 lgr->conns_all = RB_ROOT; 424 if (ini->is_smcd) { 425 /* SMC-D specific settings */ 426 get_device(&ini->ism_dev->dev); 427 lgr->peer_gid = ini->ism_gid; 428 lgr->smcd = ini->ism_dev; 429 lgr_list = &ini->ism_dev->lgr_list; 430 lgr_lock = &lgr->smcd->lgr_lock; 431 lgr->peer_shutdown = 0; 432 atomic_inc(&ini->ism_dev->lgr_cnt); 433 } else { 434 /* SMC-R specific settings */ 435 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 436 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 437 SMC_SYSTEMID_LEN); 438 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], 439 SMC_MAX_PNETID_LEN); 440 smc_llc_lgr_init(lgr, smc); 441 442 link_idx = SMC_SINGLE_LINK; 443 lnk = &lgr->lnk[link_idx]; 444 rc = smcr_link_init(lgr, lnk, link_idx, ini); 445 if (rc) 446 goto free_lgr; 447 lgr_list = &smc_lgr_list.list; 448 lgr_lock = &smc_lgr_list.lock; 449 atomic_inc(&lgr_cnt); 450 } 451 smc->conn.lgr = lgr; 452 spin_lock_bh(lgr_lock); 453 list_add(&lgr->list, lgr_list); 454 spin_unlock_bh(lgr_lock); 455 return 0; 456 457 free_lgr: 458 kfree(lgr); 459 ism_put_vlan: 460 if (ini->is_smcd && ini->vlan_id) 461 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 462 out: 463 if (rc < 0) { 464 if (rc == -ENOMEM) 465 rc = SMC_CLC_DECL_MEM; 466 else 467 rc = SMC_CLC_DECL_INTERR; 468 } 469 return rc; 470 } 471 472 static int smc_write_space(struct smc_connection *conn) 473 { 474 int buffer_len = conn->peer_rmbe_size; 475 union smc_host_cursor prod; 476 union smc_host_cursor cons; 477 int space; 478 479 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); 480 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 481 /* determine rx_buf space */ 482 space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); 483 return space; 484 } 485 486 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, 487 struct smc_wr_buf *wr_buf) 488 { 489 struct smc_connection *conn = &smc->conn; 490 union smc_host_cursor cons, fin; 491 int rc = 0; 492 int diff; 493 494 smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); 495 smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); 496 /* set prod cursor to old state, enforce tx_rdma_writes() */ 497 smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); 498 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 499 500 if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { 501 /* cons cursor advanced more than fin, and prod was set 502 * fin above, so now prod is smaller than cons. Fix that. 503 */ 504 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); 505 smc_curs_add(conn->sndbuf_desc->len, 506 &conn->tx_curs_sent, diff); 507 smc_curs_add(conn->sndbuf_desc->len, 508 &conn->tx_curs_fin, diff); 509 510 smp_mb__before_atomic(); 511 atomic_add(diff, &conn->sndbuf_space); 512 smp_mb__after_atomic(); 513 514 smc_curs_add(conn->peer_rmbe_size, 515 &conn->local_tx_ctrl.prod, diff); 516 smc_curs_add(conn->peer_rmbe_size, 517 &conn->local_tx_ctrl_fin, diff); 518 } 519 /* recalculate, value is used by tx_rdma_writes() */ 520 atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); 521 522 if (smc->sk.sk_state != SMC_INIT && 523 smc->sk.sk_state != SMC_CLOSED) { 524 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); 525 if (!rc) { 526 schedule_delayed_work(&conn->tx_work, 0); 527 smc->sk.sk_data_ready(&smc->sk); 528 } 529 } else { 530 smc_wr_tx_put_slot(conn->lnk, 531 (struct smc_wr_tx_pend_priv *)pend); 532 } 533 return rc; 534 } 535 536 struct smc_link *smc_switch_conns(struct smc_link_group *lgr, 537 struct smc_link *from_lnk, bool is_dev_err) 538 { 539 struct smc_link *to_lnk = NULL; 540 struct smc_cdc_tx_pend *pend; 541 struct smc_connection *conn; 542 struct smc_wr_buf *wr_buf; 543 struct smc_sock *smc; 544 struct rb_node *node; 545 int i, rc = 0; 546 547 /* link is inactive, wake up tx waiters */ 548 smc_wr_wakeup_tx_wait(from_lnk); 549 550 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 551 if (lgr->lnk[i].state != SMC_LNK_ACTIVE || 552 i == from_lnk->link_idx) 553 continue; 554 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && 555 from_lnk->ibport == lgr->lnk[i].ibport) { 556 continue; 557 } 558 to_lnk = &lgr->lnk[i]; 559 break; 560 } 561 if (!to_lnk) { 562 smc_lgr_terminate_sched(lgr); 563 return NULL; 564 } 565 again: 566 read_lock_bh(&lgr->conns_lock); 567 for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { 568 conn = rb_entry(node, struct smc_connection, alert_node); 569 if (conn->lnk != from_lnk) 570 continue; 571 smc = container_of(conn, struct smc_sock, conn); 572 /* conn->lnk not yet set in SMC_INIT state */ 573 if (smc->sk.sk_state == SMC_INIT) 574 continue; 575 if (smc->sk.sk_state == SMC_CLOSED || 576 smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || 577 smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || 578 smc->sk.sk_state == SMC_APPFINCLOSEWAIT || 579 smc->sk.sk_state == SMC_APPCLOSEWAIT1 || 580 smc->sk.sk_state == SMC_APPCLOSEWAIT2 || 581 smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || 582 smc->sk.sk_state == SMC_PEERABORTWAIT || 583 smc->sk.sk_state == SMC_PROCESSABORT) { 584 spin_lock_bh(&conn->send_lock); 585 conn->lnk = to_lnk; 586 spin_unlock_bh(&conn->send_lock); 587 continue; 588 } 589 sock_hold(&smc->sk); 590 read_unlock_bh(&lgr->conns_lock); 591 /* pre-fetch buffer outside of send_lock, might sleep */ 592 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); 593 if (rc) { 594 smcr_link_down_cond_sched(to_lnk); 595 return NULL; 596 } 597 /* avoid race with smcr_tx_sndbuf_nonempty() */ 598 spin_lock_bh(&conn->send_lock); 599 conn->lnk = to_lnk; 600 rc = smc_switch_cursor(smc, pend, wr_buf); 601 spin_unlock_bh(&conn->send_lock); 602 sock_put(&smc->sk); 603 if (rc) { 604 smcr_link_down_cond_sched(to_lnk); 605 return NULL; 606 } 607 goto again; 608 } 609 read_unlock_bh(&lgr->conns_lock); 610 return to_lnk; 611 } 612 613 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, 614 struct smc_link_group *lgr) 615 { 616 int rc; 617 618 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { 619 /* unregister rmb with peer */ 620 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 621 if (!rc) { 622 /* protect against smc_llc_cli_rkey_exchange() */ 623 mutex_lock(&lgr->llc_conf_mutex); 624 smc_llc_do_delete_rkey(lgr, rmb_desc); 625 rmb_desc->is_conf_rkey = false; 626 mutex_unlock(&lgr->llc_conf_mutex); 627 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 628 } 629 } 630 631 if (rmb_desc->is_reg_err) { 632 /* buf registration failed, reuse not possible */ 633 mutex_lock(&lgr->rmbs_lock); 634 list_del(&rmb_desc->list); 635 mutex_unlock(&lgr->rmbs_lock); 636 637 smc_buf_free(lgr, true, rmb_desc); 638 } else { 639 rmb_desc->used = 0; 640 } 641 } 642 643 static void smc_buf_unuse(struct smc_connection *conn, 644 struct smc_link_group *lgr) 645 { 646 if (conn->sndbuf_desc) 647 conn->sndbuf_desc->used = 0; 648 if (conn->rmb_desc && lgr->is_smcd) 649 conn->rmb_desc->used = 0; 650 else if (conn->rmb_desc) 651 smcr_buf_unuse(conn->rmb_desc, lgr); 652 } 653 654 /* remove a finished connection from its link group */ 655 void smc_conn_free(struct smc_connection *conn) 656 { 657 struct smc_link_group *lgr = conn->lgr; 658 659 if (!lgr) 660 return; 661 if (lgr->is_smcd) { 662 if (!list_empty(&lgr->list)) 663 smc_ism_unset_conn(conn); 664 tasklet_kill(&conn->rx_tsklet); 665 } else { 666 smc_cdc_tx_dismiss_slots(conn); 667 if (current_work() != &conn->abort_work) 668 cancel_work_sync(&conn->abort_work); 669 } 670 if (!list_empty(&lgr->list)) { 671 smc_lgr_unregister_conn(conn); 672 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 673 } 674 675 if (!lgr->conns_num) 676 smc_lgr_schedule_free_work(lgr); 677 } 678 679 /* unregister a link from a buf_desc */ 680 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, 681 struct smc_link *lnk) 682 { 683 if (is_rmb) 684 buf_desc->is_reg_mr[lnk->link_idx] = false; 685 if (!buf_desc->is_map_ib[lnk->link_idx]) 686 return; 687 if (is_rmb) { 688 if (buf_desc->mr_rx[lnk->link_idx]) { 689 smc_ib_put_memory_region( 690 buf_desc->mr_rx[lnk->link_idx]); 691 buf_desc->mr_rx[lnk->link_idx] = NULL; 692 } 693 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); 694 } else { 695 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); 696 } 697 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 698 buf_desc->is_map_ib[lnk->link_idx] = false; 699 } 700 701 /* unmap all buffers of lgr for a deleted link */ 702 static void smcr_buf_unmap_lgr(struct smc_link *lnk) 703 { 704 struct smc_link_group *lgr = lnk->lgr; 705 struct smc_buf_desc *buf_desc, *bf; 706 int i; 707 708 for (i = 0; i < SMC_RMBE_SIZES; i++) { 709 mutex_lock(&lgr->rmbs_lock); 710 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) 711 smcr_buf_unmap_link(buf_desc, true, lnk); 712 mutex_unlock(&lgr->rmbs_lock); 713 mutex_lock(&lgr->sndbufs_lock); 714 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], 715 list) 716 smcr_buf_unmap_link(buf_desc, false, lnk); 717 mutex_unlock(&lgr->sndbufs_lock); 718 } 719 } 720 721 static void smcr_rtoken_clear_link(struct smc_link *lnk) 722 { 723 struct smc_link_group *lgr = lnk->lgr; 724 int i; 725 726 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 727 lgr->rtokens[i][lnk->link_idx].rkey = 0; 728 lgr->rtokens[i][lnk->link_idx].dma_addr = 0; 729 } 730 } 731 732 /* must be called under lgr->llc_conf_mutex lock */ 733 void smcr_link_clear(struct smc_link *lnk, bool log) 734 { 735 struct smc_ib_device *smcibdev; 736 737 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) 738 return; 739 lnk->peer_qpn = 0; 740 smc_llc_link_clear(lnk, log); 741 smcr_buf_unmap_lgr(lnk); 742 smcr_rtoken_clear_link(lnk); 743 smc_ib_modify_qp_reset(lnk); 744 smc_wr_free_link(lnk); 745 smc_ib_destroy_queue_pair(lnk); 746 smc_ib_dealloc_protection_domain(lnk); 747 smc_wr_free_link_mem(lnk); 748 put_device(&lnk->smcibdev->ibdev->dev); 749 smcibdev = lnk->smcibdev; 750 memset(lnk, 0, sizeof(struct smc_link)); 751 lnk->state = SMC_LNK_UNUSED; 752 if (!atomic_dec_return(&smcibdev->lnk_cnt)) 753 wake_up(&smcibdev->lnks_deleted); 754 } 755 756 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 757 struct smc_buf_desc *buf_desc) 758 { 759 int i; 760 761 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 762 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); 763 764 if (buf_desc->pages) 765 __free_pages(buf_desc->pages, buf_desc->order); 766 kfree(buf_desc); 767 } 768 769 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 770 struct smc_buf_desc *buf_desc) 771 { 772 if (is_dmb) { 773 /* restore original buf len */ 774 buf_desc->len += sizeof(struct smcd_cdc_msg); 775 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 776 } else { 777 kfree(buf_desc->cpu_addr); 778 } 779 kfree(buf_desc); 780 } 781 782 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 783 struct smc_buf_desc *buf_desc) 784 { 785 if (lgr->is_smcd) 786 smcd_buf_free(lgr, is_rmb, buf_desc); 787 else 788 smcr_buf_free(lgr, is_rmb, buf_desc); 789 } 790 791 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 792 { 793 struct smc_buf_desc *buf_desc, *bf_desc; 794 struct list_head *buf_list; 795 int i; 796 797 for (i = 0; i < SMC_RMBE_SIZES; i++) { 798 if (is_rmb) 799 buf_list = &lgr->rmbs[i]; 800 else 801 buf_list = &lgr->sndbufs[i]; 802 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 803 list) { 804 list_del(&buf_desc->list); 805 smc_buf_free(lgr, is_rmb, buf_desc); 806 } 807 } 808 } 809 810 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 811 { 812 /* free send buffers */ 813 __smc_lgr_free_bufs(lgr, false); 814 /* free rmbs */ 815 __smc_lgr_free_bufs(lgr, true); 816 } 817 818 /* remove a link group */ 819 static void smc_lgr_free(struct smc_link_group *lgr) 820 { 821 int i; 822 823 if (!lgr->is_smcd) { 824 mutex_lock(&lgr->llc_conf_mutex); 825 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 826 if (lgr->lnk[i].state != SMC_LNK_UNUSED) 827 smcr_link_clear(&lgr->lnk[i], false); 828 } 829 mutex_unlock(&lgr->llc_conf_mutex); 830 smc_llc_lgr_clear(lgr); 831 } 832 833 smc_lgr_free_bufs(lgr); 834 if (lgr->is_smcd) { 835 if (!lgr->terminating) { 836 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 837 put_device(&lgr->smcd->dev); 838 } 839 if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) 840 wake_up(&lgr->smcd->lgrs_deleted); 841 } else { 842 if (!atomic_dec_return(&lgr_cnt)) 843 wake_up(&lgrs_deleted); 844 } 845 kfree(lgr); 846 } 847 848 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) 849 { 850 int i; 851 852 for (i = 0; i < SMC_RMBE_SIZES; i++) { 853 struct smc_buf_desc *buf_desc; 854 855 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { 856 buf_desc->len += sizeof(struct smcd_cdc_msg); 857 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 858 } 859 } 860 } 861 862 static void smc_sk_wake_ups(struct smc_sock *smc) 863 { 864 smc->sk.sk_write_space(&smc->sk); 865 smc->sk.sk_data_ready(&smc->sk); 866 smc->sk.sk_state_change(&smc->sk); 867 } 868 869 /* kill a connection */ 870 static void smc_conn_kill(struct smc_connection *conn, bool soft) 871 { 872 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 873 874 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) 875 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 876 else 877 smc_close_abort(conn); 878 conn->killed = 1; 879 smc->sk.sk_err = ECONNABORTED; 880 smc_sk_wake_ups(smc); 881 if (conn->lgr->is_smcd) { 882 smc_ism_unset_conn(conn); 883 if (soft) 884 tasklet_kill(&conn->rx_tsklet); 885 else 886 tasklet_unlock_wait(&conn->rx_tsklet); 887 } else { 888 smc_cdc_tx_dismiss_slots(conn); 889 } 890 smc_lgr_unregister_conn(conn); 891 smc_close_active_abort(smc); 892 } 893 894 static void smc_lgr_cleanup(struct smc_link_group *lgr) 895 { 896 if (lgr->is_smcd) { 897 smc_ism_signal_shutdown(lgr); 898 smcd_unregister_all_dmbs(lgr); 899 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 900 put_device(&lgr->smcd->dev); 901 } else { 902 u32 rsn = lgr->llc_termination_rsn; 903 904 if (!rsn) 905 rsn = SMC_LLC_DEL_PROG_INIT_TERM; 906 smc_llc_send_link_delete_all(lgr, false, rsn); 907 smcr_lgr_link_deactivate_all(lgr); 908 } 909 } 910 911 /* terminate link group 912 * @soft: true if link group shutdown can take its time 913 * false if immediate link group shutdown is required 914 */ 915 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) 916 { 917 struct smc_connection *conn; 918 struct smc_sock *smc; 919 struct rb_node *node; 920 921 if (lgr->terminating) 922 return; /* lgr already terminating */ 923 /* cancel free_work sync, will terminate when lgr->freeing is set */ 924 cancel_delayed_work_sync(&lgr->free_work); 925 lgr->terminating = 1; 926 927 /* kill remaining link group connections */ 928 read_lock_bh(&lgr->conns_lock); 929 node = rb_first(&lgr->conns_all); 930 while (node) { 931 read_unlock_bh(&lgr->conns_lock); 932 conn = rb_entry(node, struct smc_connection, alert_node); 933 smc = container_of(conn, struct smc_sock, conn); 934 sock_hold(&smc->sk); /* sock_put below */ 935 lock_sock(&smc->sk); 936 smc_conn_kill(conn, soft); 937 release_sock(&smc->sk); 938 sock_put(&smc->sk); /* sock_hold above */ 939 read_lock_bh(&lgr->conns_lock); 940 node = rb_first(&lgr->conns_all); 941 } 942 read_unlock_bh(&lgr->conns_lock); 943 smc_lgr_cleanup(lgr); 944 smc_lgr_free(lgr); 945 } 946 947 /* unlink link group and schedule termination */ 948 void smc_lgr_terminate_sched(struct smc_link_group *lgr) 949 { 950 spinlock_t *lgr_lock; 951 952 smc_lgr_list_head(lgr, &lgr_lock); 953 spin_lock_bh(lgr_lock); 954 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { 955 spin_unlock_bh(lgr_lock); 956 return; /* lgr already terminating */ 957 } 958 list_del_init(&lgr->list); 959 lgr->freeing = 1; 960 spin_unlock_bh(lgr_lock); 961 schedule_work(&lgr->terminate_work); 962 } 963 964 /* Called when peer lgr shutdown (regularly or abnormally) is received */ 965 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 966 { 967 struct smc_link_group *lgr, *l; 968 LIST_HEAD(lgr_free_list); 969 970 /* run common cleanup function and build free list */ 971 spin_lock_bh(&dev->lgr_lock); 972 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { 973 if ((!peer_gid || lgr->peer_gid == peer_gid) && 974 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 975 if (peer_gid) /* peer triggered termination */ 976 lgr->peer_shutdown = 1; 977 list_move(&lgr->list, &lgr_free_list); 978 lgr->freeing = 1; 979 } 980 } 981 spin_unlock_bh(&dev->lgr_lock); 982 983 /* cancel the regular free workers and actually free lgrs */ 984 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 985 list_del_init(&lgr->list); 986 schedule_work(&lgr->terminate_work); 987 } 988 } 989 990 /* Called when an SMCD device is removed or the smc module is unloaded */ 991 void smc_smcd_terminate_all(struct smcd_dev *smcd) 992 { 993 struct smc_link_group *lgr, *lg; 994 LIST_HEAD(lgr_free_list); 995 996 spin_lock_bh(&smcd->lgr_lock); 997 list_splice_init(&smcd->lgr_list, &lgr_free_list); 998 list_for_each_entry(lgr, &lgr_free_list, list) 999 lgr->freeing = 1; 1000 spin_unlock_bh(&smcd->lgr_lock); 1001 1002 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 1003 list_del_init(&lgr->list); 1004 __smc_lgr_terminate(lgr, false); 1005 } 1006 1007 if (atomic_read(&smcd->lgr_cnt)) 1008 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); 1009 } 1010 1011 /* Called when an SMCR device is removed or the smc module is unloaded. 1012 * If smcibdev is given, all SMCR link groups using this device are terminated. 1013 * If smcibdev is NULL, all SMCR link groups are terminated. 1014 */ 1015 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) 1016 { 1017 struct smc_link_group *lgr, *lg; 1018 LIST_HEAD(lgr_free_list); 1019 int i; 1020 1021 spin_lock_bh(&smc_lgr_list.lock); 1022 if (!smcibdev) { 1023 list_splice_init(&smc_lgr_list.list, &lgr_free_list); 1024 list_for_each_entry(lgr, &lgr_free_list, list) 1025 lgr->freeing = 1; 1026 } else { 1027 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { 1028 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1029 if (lgr->lnk[i].smcibdev == smcibdev) 1030 smcr_link_down_cond_sched(&lgr->lnk[i]); 1031 } 1032 } 1033 } 1034 spin_unlock_bh(&smc_lgr_list.lock); 1035 1036 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 1037 list_del_init(&lgr->list); 1038 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); 1039 __smc_lgr_terminate(lgr, false); 1040 } 1041 1042 if (smcibdev) { 1043 if (atomic_read(&smcibdev->lnk_cnt)) 1044 wait_event(smcibdev->lnks_deleted, 1045 !atomic_read(&smcibdev->lnk_cnt)); 1046 } else { 1047 if (atomic_read(&lgr_cnt)) 1048 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); 1049 } 1050 } 1051 1052 /* set new lgr type and clear all asymmetric link tagging */ 1053 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) 1054 { 1055 char *lgr_type = ""; 1056 int i; 1057 1058 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 1059 if (smc_link_usable(&lgr->lnk[i])) 1060 lgr->lnk[i].link_is_asym = false; 1061 if (lgr->type == new_type) 1062 return; 1063 lgr->type = new_type; 1064 1065 switch (lgr->type) { 1066 case SMC_LGR_NONE: 1067 lgr_type = "NONE"; 1068 break; 1069 case SMC_LGR_SINGLE: 1070 lgr_type = "SINGLE"; 1071 break; 1072 case SMC_LGR_SYMMETRIC: 1073 lgr_type = "SYMMETRIC"; 1074 break; 1075 case SMC_LGR_ASYMMETRIC_PEER: 1076 lgr_type = "ASYMMETRIC_PEER"; 1077 break; 1078 case SMC_LGR_ASYMMETRIC_LOCAL: 1079 lgr_type = "ASYMMETRIC_LOCAL"; 1080 break; 1081 } 1082 pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " 1083 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, 1084 lgr_type, lgr->pnet_id); 1085 } 1086 1087 /* set new lgr type and tag a link as asymmetric */ 1088 void smcr_lgr_set_type_asym(struct smc_link_group *lgr, 1089 enum smc_lgr_type new_type, int asym_lnk_idx) 1090 { 1091 smcr_lgr_set_type(lgr, new_type); 1092 lgr->lnk[asym_lnk_idx].link_is_asym = true; 1093 } 1094 1095 /* abort connection, abort_work scheduled from tasklet context */ 1096 static void smc_conn_abort_work(struct work_struct *work) 1097 { 1098 struct smc_connection *conn = container_of(work, 1099 struct smc_connection, 1100 abort_work); 1101 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 1102 1103 smc_conn_kill(conn, true); 1104 sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ 1105 } 1106 1107 /* link is up - establish alternate link if applicable */ 1108 static void smcr_link_up(struct smc_link_group *lgr, 1109 struct smc_ib_device *smcibdev, u8 ibport) 1110 { 1111 struct smc_link *link = NULL; 1112 1113 if (list_empty(&lgr->list) || 1114 lgr->type == SMC_LGR_SYMMETRIC || 1115 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1116 return; 1117 1118 if (lgr->role == SMC_SERV) { 1119 /* trigger local add link processing */ 1120 link = smc_llc_usable_link(lgr); 1121 if (!link) 1122 return; 1123 smc_llc_srv_add_link_local(link); 1124 } else { 1125 /* invite server to start add link processing */ 1126 u8 gid[SMC_GID_SIZE]; 1127 1128 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, 1129 NULL)) 1130 return; 1131 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1132 /* some other llc task is ongoing */ 1133 wait_event_interruptible_timeout(lgr->llc_waiter, 1134 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1135 SMC_LLC_WAIT_TIME); 1136 } 1137 if (list_empty(&lgr->list) || 1138 !smc_ib_port_active(smcibdev, ibport)) 1139 return; /* lgr or device no longer active */ 1140 link = smc_llc_usable_link(lgr); 1141 if (!link) 1142 return; 1143 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, 1144 NULL, SMC_LLC_REQ); 1145 } 1146 } 1147 1148 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) 1149 { 1150 struct smc_ib_up_work *ib_work; 1151 struct smc_link_group *lgr, *n; 1152 1153 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1154 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1155 SMC_MAX_PNETID_LEN) || 1156 lgr->type == SMC_LGR_SYMMETRIC || 1157 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1158 continue; 1159 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); 1160 if (!ib_work) 1161 continue; 1162 INIT_WORK(&ib_work->work, smc_link_up_work); 1163 ib_work->lgr = lgr; 1164 ib_work->smcibdev = smcibdev; 1165 ib_work->ibport = ibport; 1166 schedule_work(&ib_work->work); 1167 } 1168 } 1169 1170 /* link is down - switch connections to alternate link, 1171 * must be called under lgr->llc_conf_mutex lock 1172 */ 1173 static void smcr_link_down(struct smc_link *lnk) 1174 { 1175 struct smc_link_group *lgr = lnk->lgr; 1176 struct smc_link *to_lnk; 1177 int del_link_id; 1178 1179 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) 1180 return; 1181 1182 smc_ib_modify_qp_reset(lnk); 1183 to_lnk = smc_switch_conns(lgr, lnk, true); 1184 if (!to_lnk) { /* no backup link available */ 1185 smcr_link_clear(lnk, true); 1186 return; 1187 } 1188 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); 1189 del_link_id = lnk->link_id; 1190 1191 if (lgr->role == SMC_SERV) { 1192 /* trigger local delete link processing */ 1193 smc_llc_srv_delete_link_local(to_lnk, del_link_id); 1194 } else { 1195 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1196 /* another llc task is ongoing */ 1197 mutex_unlock(&lgr->llc_conf_mutex); 1198 wait_event_interruptible_timeout(lgr->llc_waiter, 1199 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1200 SMC_LLC_WAIT_TIME); 1201 mutex_lock(&lgr->llc_conf_mutex); 1202 } 1203 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, 1204 SMC_LLC_DEL_LOST_PATH); 1205 } 1206 } 1207 1208 /* must be called under lgr->llc_conf_mutex lock */ 1209 void smcr_link_down_cond(struct smc_link *lnk) 1210 { 1211 if (smc_link_downing(&lnk->state)) 1212 smcr_link_down(lnk); 1213 } 1214 1215 /* will get the lgr->llc_conf_mutex lock */ 1216 void smcr_link_down_cond_sched(struct smc_link *lnk) 1217 { 1218 if (smc_link_downing(&lnk->state)) 1219 schedule_work(&lnk->link_down_wrk); 1220 } 1221 1222 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) 1223 { 1224 struct smc_link_group *lgr, *n; 1225 int i; 1226 1227 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1228 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1229 SMC_MAX_PNETID_LEN)) 1230 continue; /* lgr is not affected */ 1231 if (list_empty(&lgr->list)) 1232 continue; 1233 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1234 struct smc_link *lnk = &lgr->lnk[i]; 1235 1236 if (smc_link_usable(lnk) && 1237 lnk->smcibdev == smcibdev && lnk->ibport == ibport) 1238 smcr_link_down_cond_sched(lnk); 1239 } 1240 } 1241 } 1242 1243 static void smc_link_up_work(struct work_struct *work) 1244 { 1245 struct smc_ib_up_work *ib_work = container_of(work, 1246 struct smc_ib_up_work, 1247 work); 1248 struct smc_link_group *lgr = ib_work->lgr; 1249 1250 if (list_empty(&lgr->list)) 1251 goto out; 1252 smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); 1253 out: 1254 kfree(ib_work); 1255 } 1256 1257 static void smc_link_down_work(struct work_struct *work) 1258 { 1259 struct smc_link *link = container_of(work, struct smc_link, 1260 link_down_wrk); 1261 struct smc_link_group *lgr = link->lgr; 1262 1263 if (list_empty(&lgr->list)) 1264 return; 1265 wake_up_interruptible_all(&lgr->llc_waiter); 1266 mutex_lock(&lgr->llc_conf_mutex); 1267 smcr_link_down(link); 1268 mutex_unlock(&lgr->llc_conf_mutex); 1269 } 1270 1271 /* Determine vlan of internal TCP socket. 1272 * @vlan_id: address to store the determined vlan id into 1273 */ 1274 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 1275 { 1276 struct dst_entry *dst = sk_dst_get(clcsock->sk); 1277 struct net_device *ndev; 1278 int i, nest_lvl, rc = 0; 1279 1280 ini->vlan_id = 0; 1281 if (!dst) { 1282 rc = -ENOTCONN; 1283 goto out; 1284 } 1285 if (!dst->dev) { 1286 rc = -ENODEV; 1287 goto out_rel; 1288 } 1289 1290 ndev = dst->dev; 1291 if (is_vlan_dev(ndev)) { 1292 ini->vlan_id = vlan_dev_vlan_id(ndev); 1293 goto out_rel; 1294 } 1295 1296 rtnl_lock(); 1297 nest_lvl = ndev->lower_level; 1298 for (i = 0; i < nest_lvl; i++) { 1299 struct list_head *lower = &ndev->adj_list.lower; 1300 1301 if (list_empty(lower)) 1302 break; 1303 lower = lower->next; 1304 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 1305 if (is_vlan_dev(ndev)) { 1306 ini->vlan_id = vlan_dev_vlan_id(ndev); 1307 break; 1308 } 1309 } 1310 rtnl_unlock(); 1311 1312 out_rel: 1313 dst_release(dst); 1314 out: 1315 return rc; 1316 } 1317 1318 static bool smcr_lgr_match(struct smc_link_group *lgr, 1319 struct smc_clc_msg_local *lcl, 1320 enum smc_lgr_role role, u32 clcqpn) 1321 { 1322 int i; 1323 1324 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || 1325 lgr->role != role) 1326 return false; 1327 1328 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1329 if (lgr->lnk[i].state != SMC_LNK_ACTIVE) 1330 continue; 1331 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && 1332 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && 1333 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) 1334 return true; 1335 } 1336 return false; 1337 } 1338 1339 static bool smcd_lgr_match(struct smc_link_group *lgr, 1340 struct smcd_dev *smcismdev, u64 peer_gid) 1341 { 1342 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 1343 } 1344 1345 /* create a new SMC connection (and a new link group if necessary) */ 1346 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 1347 { 1348 struct smc_connection *conn = &smc->conn; 1349 struct list_head *lgr_list; 1350 struct smc_link_group *lgr; 1351 enum smc_lgr_role role; 1352 spinlock_t *lgr_lock; 1353 int rc = 0; 1354 1355 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; 1356 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; 1357 ini->cln_first_contact = SMC_FIRST_CONTACT; 1358 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 1359 if (role == SMC_CLNT && ini->srv_first_contact) 1360 /* create new link group as well */ 1361 goto create; 1362 1363 /* determine if an existing link group can be reused */ 1364 spin_lock_bh(lgr_lock); 1365 list_for_each_entry(lgr, lgr_list, list) { 1366 write_lock_bh(&lgr->conns_lock); 1367 if ((ini->is_smcd ? 1368 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 1369 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 1370 !lgr->sync_err && 1371 lgr->vlan_id == ini->vlan_id && 1372 (role == SMC_CLNT || 1373 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 1374 /* link group found */ 1375 ini->cln_first_contact = SMC_REUSE_CONTACT; 1376 conn->lgr = lgr; 1377 rc = smc_lgr_register_conn(conn, false); 1378 write_unlock_bh(&lgr->conns_lock); 1379 if (!rc && delayed_work_pending(&lgr->free_work)) 1380 cancel_delayed_work(&lgr->free_work); 1381 break; 1382 } 1383 write_unlock_bh(&lgr->conns_lock); 1384 } 1385 spin_unlock_bh(lgr_lock); 1386 if (rc) 1387 return rc; 1388 1389 if (role == SMC_CLNT && !ini->srv_first_contact && 1390 ini->cln_first_contact == SMC_FIRST_CONTACT) { 1391 /* Server reuses a link group, but Client wants to start 1392 * a new one 1393 * send out_of_sync decline, reason synchr. error 1394 */ 1395 return SMC_CLC_DECL_SYNCERR; 1396 } 1397 1398 create: 1399 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 1400 rc = smc_lgr_create(smc, ini); 1401 if (rc) 1402 goto out; 1403 lgr = conn->lgr; 1404 write_lock_bh(&lgr->conns_lock); 1405 rc = smc_lgr_register_conn(conn, true); 1406 write_unlock_bh(&lgr->conns_lock); 1407 if (rc) 1408 goto out; 1409 } 1410 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 1411 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 1412 conn->urg_state = SMC_URG_READ; 1413 INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); 1414 if (ini->is_smcd) { 1415 conn->rx_off = sizeof(struct smcd_cdc_msg); 1416 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 1417 } 1418 #ifndef KERNEL_HAS_ATOMIC64 1419 spin_lock_init(&conn->acurs_lock); 1420 #endif 1421 1422 out: 1423 return rc; 1424 } 1425 1426 /* convert the RMB size into the compressed notation - minimum 16K. 1427 * In contrast to plain ilog2, this rounds towards the next power of 2, 1428 * so the socket application gets at least its desired sndbuf / rcvbuf size. 1429 */ 1430 static u8 smc_compress_bufsize(int size) 1431 { 1432 u8 compressed; 1433 1434 if (size <= SMC_BUF_MIN_SIZE) 1435 return 0; 1436 1437 size = (size - 1) >> 14; 1438 compressed = ilog2(size) + 1; 1439 if (compressed >= SMC_RMBE_SIZES) 1440 compressed = SMC_RMBE_SIZES - 1; 1441 return compressed; 1442 } 1443 1444 /* convert the RMB size from compressed notation into integer */ 1445 int smc_uncompress_bufsize(u8 compressed) 1446 { 1447 u32 size; 1448 1449 size = 0x00000001 << (((int)compressed) + 14); 1450 return (int)size; 1451 } 1452 1453 /* try to reuse a sndbuf or rmb description slot for a certain 1454 * buffer size; if not available, return NULL 1455 */ 1456 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 1457 struct mutex *lock, 1458 struct list_head *buf_list) 1459 { 1460 struct smc_buf_desc *buf_slot; 1461 1462 mutex_lock(lock); 1463 list_for_each_entry(buf_slot, buf_list, list) { 1464 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 1465 mutex_unlock(lock); 1466 return buf_slot; 1467 } 1468 } 1469 mutex_unlock(lock); 1470 return NULL; 1471 } 1472 1473 /* one of the conditions for announcing a receiver's current window size is 1474 * that it "results in a minimum increase in the window size of 10% of the 1475 * receive buffer space" [RFC7609] 1476 */ 1477 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 1478 { 1479 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 1480 } 1481 1482 /* map an rmb buf to a link */ 1483 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, 1484 struct smc_link *lnk) 1485 { 1486 int rc; 1487 1488 if (buf_desc->is_map_ib[lnk->link_idx]) 1489 return 0; 1490 1491 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); 1492 if (rc) 1493 return rc; 1494 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, 1495 buf_desc->cpu_addr, buf_desc->len); 1496 1497 /* map sg table to DMA address */ 1498 rc = smc_ib_buf_map_sg(lnk, buf_desc, 1499 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1500 /* SMC protocol depends on mapping to one DMA address only */ 1501 if (rc != 1) { 1502 rc = -EAGAIN; 1503 goto free_table; 1504 } 1505 1506 /* create a new memory region for the RMB */ 1507 if (is_rmb) { 1508 rc = smc_ib_get_memory_region(lnk->roce_pd, 1509 IB_ACCESS_REMOTE_WRITE | 1510 IB_ACCESS_LOCAL_WRITE, 1511 buf_desc, lnk->link_idx); 1512 if (rc) 1513 goto buf_unmap; 1514 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); 1515 } 1516 buf_desc->is_map_ib[lnk->link_idx] = true; 1517 return 0; 1518 1519 buf_unmap: 1520 smc_ib_buf_unmap_sg(lnk, buf_desc, 1521 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1522 free_table: 1523 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 1524 return rc; 1525 } 1526 1527 /* register a new rmb on IB device, 1528 * must be called under lgr->llc_conf_mutex lock 1529 */ 1530 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) 1531 { 1532 if (list_empty(&link->lgr->list)) 1533 return -ENOLINK; 1534 if (!rmb_desc->is_reg_mr[link->link_idx]) { 1535 /* register memory region for new rmb */ 1536 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { 1537 rmb_desc->is_reg_err = true; 1538 return -EFAULT; 1539 } 1540 rmb_desc->is_reg_mr[link->link_idx] = true; 1541 } 1542 return 0; 1543 } 1544 1545 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, 1546 struct list_head *lst, bool is_rmb) 1547 { 1548 struct smc_buf_desc *buf_desc, *bf; 1549 int rc = 0; 1550 1551 mutex_lock(lock); 1552 list_for_each_entry_safe(buf_desc, bf, lst, list) { 1553 if (!buf_desc->used) 1554 continue; 1555 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); 1556 if (rc) 1557 goto out; 1558 } 1559 out: 1560 mutex_unlock(lock); 1561 return rc; 1562 } 1563 1564 /* map all used buffers of lgr for a new link */ 1565 int smcr_buf_map_lgr(struct smc_link *lnk) 1566 { 1567 struct smc_link_group *lgr = lnk->lgr; 1568 int i, rc = 0; 1569 1570 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1571 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, 1572 &lgr->rmbs[i], true); 1573 if (rc) 1574 return rc; 1575 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, 1576 &lgr->sndbufs[i], false); 1577 if (rc) 1578 return rc; 1579 } 1580 return 0; 1581 } 1582 1583 /* register all used buffers of lgr for a new link, 1584 * must be called under lgr->llc_conf_mutex lock 1585 */ 1586 int smcr_buf_reg_lgr(struct smc_link *lnk) 1587 { 1588 struct smc_link_group *lgr = lnk->lgr; 1589 struct smc_buf_desc *buf_desc, *bf; 1590 int i, rc = 0; 1591 1592 mutex_lock(&lgr->rmbs_lock); 1593 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1594 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { 1595 if (!buf_desc->used) 1596 continue; 1597 rc = smcr_link_reg_rmb(lnk, buf_desc); 1598 if (rc) 1599 goto out; 1600 } 1601 } 1602 out: 1603 mutex_unlock(&lgr->rmbs_lock); 1604 return rc; 1605 } 1606 1607 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 1608 bool is_rmb, int bufsize) 1609 { 1610 struct smc_buf_desc *buf_desc; 1611 1612 /* try to alloc a new buffer */ 1613 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1614 if (!buf_desc) 1615 return ERR_PTR(-ENOMEM); 1616 1617 buf_desc->order = get_order(bufsize); 1618 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 1619 __GFP_NOMEMALLOC | __GFP_COMP | 1620 __GFP_NORETRY | __GFP_ZERO, 1621 buf_desc->order); 1622 if (!buf_desc->pages) { 1623 kfree(buf_desc); 1624 return ERR_PTR(-EAGAIN); 1625 } 1626 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 1627 buf_desc->len = bufsize; 1628 return buf_desc; 1629 } 1630 1631 /* map buf_desc on all usable links, 1632 * unused buffers stay mapped as long as the link is up 1633 */ 1634 static int smcr_buf_map_usable_links(struct smc_link_group *lgr, 1635 struct smc_buf_desc *buf_desc, bool is_rmb) 1636 { 1637 int i, rc = 0; 1638 1639 /* protect against parallel link reconfiguration */ 1640 mutex_lock(&lgr->llc_conf_mutex); 1641 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1642 struct smc_link *lnk = &lgr->lnk[i]; 1643 1644 if (!smc_link_usable(lnk)) 1645 continue; 1646 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { 1647 rc = -ENOMEM; 1648 goto out; 1649 } 1650 } 1651 out: 1652 mutex_unlock(&lgr->llc_conf_mutex); 1653 return rc; 1654 } 1655 1656 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 1657 1658 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 1659 bool is_dmb, int bufsize) 1660 { 1661 struct smc_buf_desc *buf_desc; 1662 int rc; 1663 1664 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 1665 return ERR_PTR(-EAGAIN); 1666 1667 /* try to alloc a new DMB */ 1668 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1669 if (!buf_desc) 1670 return ERR_PTR(-ENOMEM); 1671 if (is_dmb) { 1672 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 1673 if (rc) { 1674 kfree(buf_desc); 1675 return ERR_PTR(-EAGAIN); 1676 } 1677 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 1678 /* CDC header stored in buf. So, pretend it was smaller */ 1679 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 1680 } else { 1681 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 1682 __GFP_NOWARN | __GFP_NORETRY | 1683 __GFP_NOMEMALLOC); 1684 if (!buf_desc->cpu_addr) { 1685 kfree(buf_desc); 1686 return ERR_PTR(-EAGAIN); 1687 } 1688 buf_desc->len = bufsize; 1689 } 1690 return buf_desc; 1691 } 1692 1693 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 1694 { 1695 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 1696 struct smc_connection *conn = &smc->conn; 1697 struct smc_link_group *lgr = conn->lgr; 1698 struct list_head *buf_list; 1699 int bufsize, bufsize_short; 1700 struct mutex *lock; /* lock buffer list */ 1701 int sk_buf_size; 1702 1703 if (is_rmb) 1704 /* use socket recv buffer size (w/o overhead) as start value */ 1705 sk_buf_size = smc->sk.sk_rcvbuf / 2; 1706 else 1707 /* use socket send buffer size (w/o overhead) as start value */ 1708 sk_buf_size = smc->sk.sk_sndbuf / 2; 1709 1710 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 1711 bufsize_short >= 0; bufsize_short--) { 1712 1713 if (is_rmb) { 1714 lock = &lgr->rmbs_lock; 1715 buf_list = &lgr->rmbs[bufsize_short]; 1716 } else { 1717 lock = &lgr->sndbufs_lock; 1718 buf_list = &lgr->sndbufs[bufsize_short]; 1719 } 1720 bufsize = smc_uncompress_bufsize(bufsize_short); 1721 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 1722 continue; 1723 1724 /* check for reusable slot in the link group */ 1725 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 1726 if (buf_desc) { 1727 memset(buf_desc->cpu_addr, 0, bufsize); 1728 break; /* found reusable slot */ 1729 } 1730 1731 if (is_smcd) 1732 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 1733 else 1734 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 1735 1736 if (PTR_ERR(buf_desc) == -ENOMEM) 1737 break; 1738 if (IS_ERR(buf_desc)) 1739 continue; 1740 1741 buf_desc->used = 1; 1742 mutex_lock(lock); 1743 list_add(&buf_desc->list, buf_list); 1744 mutex_unlock(lock); 1745 break; /* found */ 1746 } 1747 1748 if (IS_ERR(buf_desc)) 1749 return -ENOMEM; 1750 1751 if (!is_smcd) { 1752 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { 1753 smcr_buf_unuse(buf_desc, lgr); 1754 return -ENOMEM; 1755 } 1756 } 1757 1758 if (is_rmb) { 1759 conn->rmb_desc = buf_desc; 1760 conn->rmbe_size_short = bufsize_short; 1761 smc->sk.sk_rcvbuf = bufsize * 2; 1762 atomic_set(&conn->bytes_to_rcv, 0); 1763 conn->rmbe_update_limit = 1764 smc_rmb_wnd_update_limit(buf_desc->len); 1765 if (is_smcd) 1766 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 1767 } else { 1768 conn->sndbuf_desc = buf_desc; 1769 smc->sk.sk_sndbuf = bufsize * 2; 1770 atomic_set(&conn->sndbuf_space, bufsize); 1771 } 1772 return 0; 1773 } 1774 1775 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 1776 { 1777 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1778 return; 1779 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1780 } 1781 1782 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 1783 { 1784 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1785 return; 1786 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1787 } 1788 1789 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 1790 { 1791 int i; 1792 1793 if (!conn->lgr || conn->lgr->is_smcd) 1794 return; 1795 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1796 if (!smc_link_usable(&conn->lgr->lnk[i])) 1797 continue; 1798 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, 1799 DMA_FROM_DEVICE); 1800 } 1801 } 1802 1803 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 1804 { 1805 int i; 1806 1807 if (!conn->lgr || conn->lgr->is_smcd) 1808 return; 1809 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1810 if (!smc_link_usable(&conn->lgr->lnk[i])) 1811 continue; 1812 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, 1813 DMA_FROM_DEVICE); 1814 } 1815 } 1816 1817 /* create the send and receive buffer for an SMC socket; 1818 * receive buffers are called RMBs; 1819 * (even though the SMC protocol allows more than one RMB-element per RMB, 1820 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 1821 * extra RMB for every connection in a link group 1822 */ 1823 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 1824 { 1825 int rc; 1826 1827 /* create send buffer */ 1828 rc = __smc_buf_create(smc, is_smcd, false); 1829 if (rc) 1830 return rc; 1831 /* create rmb */ 1832 rc = __smc_buf_create(smc, is_smcd, true); 1833 if (rc) 1834 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 1835 return rc; 1836 } 1837 1838 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 1839 { 1840 int i; 1841 1842 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 1843 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 1844 return i; 1845 } 1846 return -ENOSPC; 1847 } 1848 1849 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, 1850 u32 rkey) 1851 { 1852 int i; 1853 1854 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1855 if (test_bit(i, lgr->rtokens_used_mask) && 1856 lgr->rtokens[i][lnk_idx].rkey == rkey) 1857 return i; 1858 } 1859 return -ENOENT; 1860 } 1861 1862 /* set rtoken for a new link to an existing rmb */ 1863 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, 1864 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) 1865 { 1866 int rtok_idx; 1867 1868 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); 1869 if (rtok_idx == -ENOENT) 1870 return; 1871 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); 1872 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); 1873 } 1874 1875 /* set rtoken for a new link whose link_id is given */ 1876 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, 1877 __be64 nw_vaddr, __be32 nw_rkey) 1878 { 1879 u64 dma_addr = be64_to_cpu(nw_vaddr); 1880 u32 rkey = ntohl(nw_rkey); 1881 bool found = false; 1882 int link_idx; 1883 1884 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { 1885 if (lgr->lnk[link_idx].link_id == link_id) { 1886 found = true; 1887 break; 1888 } 1889 } 1890 if (!found) 1891 return; 1892 lgr->rtokens[rtok_idx][link_idx].rkey = rkey; 1893 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; 1894 } 1895 1896 /* add a new rtoken from peer */ 1897 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) 1898 { 1899 struct smc_link_group *lgr = smc_get_lgr(lnk); 1900 u64 dma_addr = be64_to_cpu(nw_vaddr); 1901 u32 rkey = ntohl(nw_rkey); 1902 int i; 1903 1904 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1905 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1906 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && 1907 test_bit(i, lgr->rtokens_used_mask)) { 1908 /* already in list */ 1909 return i; 1910 } 1911 } 1912 i = smc_rmb_reserve_rtoken_idx(lgr); 1913 if (i < 0) 1914 return i; 1915 lgr->rtokens[i][lnk->link_idx].rkey = rkey; 1916 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; 1917 return i; 1918 } 1919 1920 /* delete an rtoken from all links */ 1921 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) 1922 { 1923 struct smc_link_group *lgr = smc_get_lgr(lnk); 1924 u32 rkey = ntohl(nw_rkey); 1925 int i, j; 1926 1927 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1928 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1929 test_bit(i, lgr->rtokens_used_mask)) { 1930 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { 1931 lgr->rtokens[i][j].rkey = 0; 1932 lgr->rtokens[i][j].dma_addr = 0; 1933 } 1934 clear_bit(i, lgr->rtokens_used_mask); 1935 return 0; 1936 } 1937 } 1938 return -ENOENT; 1939 } 1940 1941 /* save rkey and dma_addr received from peer during clc handshake */ 1942 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1943 struct smc_link *lnk, 1944 struct smc_clc_msg_accept_confirm *clc) 1945 { 1946 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, 1947 clc->rmb_rkey); 1948 if (conn->rtoken_idx < 0) 1949 return conn->rtoken_idx; 1950 return 0; 1951 } 1952 1953 static void smc_core_going_away(void) 1954 { 1955 struct smc_ib_device *smcibdev; 1956 struct smcd_dev *smcd; 1957 1958 spin_lock(&smc_ib_devices.lock); 1959 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { 1960 int i; 1961 1962 for (i = 0; i < SMC_MAX_PORTS; i++) 1963 set_bit(i, smcibdev->ports_going_away); 1964 } 1965 spin_unlock(&smc_ib_devices.lock); 1966 1967 spin_lock(&smcd_dev_list.lock); 1968 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1969 smcd->going_away = 1; 1970 } 1971 spin_unlock(&smcd_dev_list.lock); 1972 } 1973 1974 /* Clean up all SMC link groups */ 1975 static void smc_lgrs_shutdown(void) 1976 { 1977 struct smcd_dev *smcd; 1978 1979 smc_core_going_away(); 1980 1981 smc_smcr_terminate_all(NULL); 1982 1983 spin_lock(&smcd_dev_list.lock); 1984 list_for_each_entry(smcd, &smcd_dev_list.list, list) 1985 smc_smcd_terminate_all(smcd); 1986 spin_unlock(&smcd_dev_list.lock); 1987 } 1988 1989 static int smc_core_reboot_event(struct notifier_block *this, 1990 unsigned long event, void *ptr) 1991 { 1992 smc_lgrs_shutdown(); 1993 smc_ib_unregister_client(); 1994 return 0; 1995 } 1996 1997 static struct notifier_block smc_reboot_notifier = { 1998 .notifier_call = smc_core_reboot_event, 1999 }; 2000 2001 int __init smc_core_init(void) 2002 { 2003 return register_reboot_notifier(&smc_reboot_notifier); 2004 } 2005 2006 /* Called (from smc_exit) when module is removed */ 2007 void smc_core_exit(void) 2008 { 2009 unregister_reboot_notifier(&smc_reboot_notifier); 2010 smc_lgrs_shutdown(); 2011 } 2012