1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <linux/wait.h> 17 #include <linux/reboot.h> 18 #include <linux/mutex.h> 19 #include <net/tcp.h> 20 #include <net/sock.h> 21 #include <rdma/ib_verbs.h> 22 #include <rdma/ib_cache.h> 23 24 #include "smc.h" 25 #include "smc_clc.h" 26 #include "smc_core.h" 27 #include "smc_ib.h" 28 #include "smc_wr.h" 29 #include "smc_llc.h" 30 #include "smc_cdc.h" 31 #include "smc_close.h" 32 #include "smc_ism.h" 33 34 #define SMC_LGR_NUM_INCR 256 35 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 36 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 37 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 38 39 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 40 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 41 .list = LIST_HEAD_INIT(smc_lgr_list.list), 42 .num = 0, 43 }; 44 45 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ 46 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); 47 48 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 49 struct smc_buf_desc *buf_desc); 50 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); 51 52 static void smc_link_down_work(struct work_struct *work); 53 54 /* return head of link group list and its lock for a given link group */ 55 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, 56 spinlock_t **lgr_lock) 57 { 58 if (lgr->is_smcd) { 59 *lgr_lock = &lgr->smcd->lgr_lock; 60 return &lgr->smcd->lgr_list; 61 } 62 63 *lgr_lock = &smc_lgr_list.lock; 64 return &smc_lgr_list.list; 65 } 66 67 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 68 { 69 /* client link group creation always follows the server link group 70 * creation. For client use a somewhat higher removal delay time, 71 * otherwise there is a risk of out-of-sync link groups. 72 */ 73 if (!lgr->freeing && !lgr->freefast) { 74 mod_delayed_work(system_wq, &lgr->free_work, 75 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 76 SMC_LGR_FREE_DELAY_CLNT : 77 SMC_LGR_FREE_DELAY_SERV); 78 } 79 } 80 81 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 82 { 83 if (!lgr->freeing && !lgr->freefast) { 84 lgr->freefast = 1; 85 mod_delayed_work(system_wq, &lgr->free_work, 86 SMC_LGR_FREE_DELAY_FAST); 87 } 88 } 89 90 /* Register connection's alert token in our lookup structure. 91 * To use rbtrees we have to implement our own insert core. 92 * Requires @conns_lock 93 * @smc connection to register 94 * Returns 0 on success, != otherwise. 95 */ 96 static void smc_lgr_add_alert_token(struct smc_connection *conn) 97 { 98 struct rb_node **link, *parent = NULL; 99 u32 token = conn->alert_token_local; 100 101 link = &conn->lgr->conns_all.rb_node; 102 while (*link) { 103 struct smc_connection *cur = rb_entry(*link, 104 struct smc_connection, alert_node); 105 106 parent = *link; 107 if (cur->alert_token_local > token) 108 link = &parent->rb_left; 109 else 110 link = &parent->rb_right; 111 } 112 /* Put the new node there */ 113 rb_link_node(&conn->alert_node, parent, link); 114 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 115 } 116 117 /* assign an SMC-R link to the connection */ 118 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) 119 { 120 enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : 121 SMC_LNK_ACTIVE; 122 int i, j; 123 124 /* do link balancing */ 125 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 126 struct smc_link *lnk = &conn->lgr->lnk[i]; 127 128 if (lnk->state != expected || lnk->link_is_asym) 129 continue; 130 if (conn->lgr->role == SMC_CLNT) { 131 conn->lnk = lnk; /* temporary, SMC server assigns link*/ 132 break; 133 } 134 if (conn->lgr->conns_num % 2) { 135 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { 136 struct smc_link *lnk2; 137 138 lnk2 = &conn->lgr->lnk[j]; 139 if (lnk2->state == expected && 140 !lnk2->link_is_asym) { 141 conn->lnk = lnk2; 142 break; 143 } 144 } 145 } 146 if (!conn->lnk) 147 conn->lnk = lnk; 148 break; 149 } 150 if (!conn->lnk) 151 return SMC_CLC_DECL_NOACTLINK; 152 return 0; 153 } 154 155 /* Register connection in link group by assigning an alert token 156 * registered in a search tree. 157 * Requires @conns_lock 158 * Note that '0' is a reserved value and not assigned. 159 */ 160 static int smc_lgr_register_conn(struct smc_connection *conn, bool first) 161 { 162 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 163 static atomic_t nexttoken = ATOMIC_INIT(0); 164 int rc; 165 166 if (!conn->lgr->is_smcd) { 167 rc = smcr_lgr_conn_assign_link(conn, first); 168 if (rc) 169 return rc; 170 } 171 /* find a new alert_token_local value not yet used by some connection 172 * in this link group 173 */ 174 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 175 while (!conn->alert_token_local) { 176 conn->alert_token_local = atomic_inc_return(&nexttoken); 177 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 178 conn->alert_token_local = 0; 179 } 180 smc_lgr_add_alert_token(conn); 181 conn->lgr->conns_num++; 182 return 0; 183 } 184 185 /* Unregister connection and reset the alert token of the given connection< 186 */ 187 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 188 { 189 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 190 struct smc_link_group *lgr = conn->lgr; 191 192 rb_erase(&conn->alert_node, &lgr->conns_all); 193 lgr->conns_num--; 194 conn->alert_token_local = 0; 195 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 196 } 197 198 /* Unregister connection from lgr 199 */ 200 static void smc_lgr_unregister_conn(struct smc_connection *conn) 201 { 202 struct smc_link_group *lgr = conn->lgr; 203 204 if (!lgr) 205 return; 206 write_lock_bh(&lgr->conns_lock); 207 if (conn->alert_token_local) { 208 __smc_lgr_unregister_conn(conn); 209 } 210 write_unlock_bh(&lgr->conns_lock); 211 conn->lgr = NULL; 212 } 213 214 void smc_lgr_cleanup_early(struct smc_connection *conn) 215 { 216 struct smc_link_group *lgr = conn->lgr; 217 struct list_head *lgr_list; 218 spinlock_t *lgr_lock; 219 220 if (!lgr) 221 return; 222 223 smc_conn_free(conn); 224 lgr_list = smc_lgr_list_head(lgr, &lgr_lock); 225 spin_lock_bh(lgr_lock); 226 /* do not use this link group for new connections */ 227 if (!list_empty(lgr_list)) 228 list_del_init(lgr_list); 229 spin_unlock_bh(lgr_lock); 230 smc_lgr_schedule_free_work_fast(lgr); 231 } 232 233 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) 234 { 235 int i; 236 237 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 238 struct smc_link *lnk = &lgr->lnk[i]; 239 240 if (smc_link_usable(lnk)) 241 lnk->state = SMC_LNK_INACTIVE; 242 } 243 wake_up_all(&lgr->llc_msg_waiter); 244 wake_up_all(&lgr->llc_flow_waiter); 245 } 246 247 static void smc_lgr_free(struct smc_link_group *lgr); 248 249 static void smc_lgr_free_work(struct work_struct *work) 250 { 251 struct smc_link_group *lgr = container_of(to_delayed_work(work), 252 struct smc_link_group, 253 free_work); 254 spinlock_t *lgr_lock; 255 bool conns; 256 257 smc_lgr_list_head(lgr, &lgr_lock); 258 spin_lock_bh(lgr_lock); 259 if (lgr->freeing) { 260 spin_unlock_bh(lgr_lock); 261 return; 262 } 263 read_lock_bh(&lgr->conns_lock); 264 conns = RB_EMPTY_ROOT(&lgr->conns_all); 265 read_unlock_bh(&lgr->conns_lock); 266 if (!conns) { /* number of lgr connections is no longer zero */ 267 spin_unlock_bh(lgr_lock); 268 return; 269 } 270 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 271 lgr->freeing = 1; /* this instance does the freeing, no new schedule */ 272 spin_unlock_bh(lgr_lock); 273 cancel_delayed_work(&lgr->free_work); 274 275 if (!lgr->is_smcd && !lgr->terminating) 276 smc_llc_send_link_delete_all(lgr, true, 277 SMC_LLC_DEL_PROG_INIT_TERM); 278 if (lgr->is_smcd && !lgr->terminating) 279 smc_ism_signal_shutdown(lgr); 280 if (!lgr->is_smcd) 281 smcr_lgr_link_deactivate_all(lgr); 282 smc_lgr_free(lgr); 283 } 284 285 static void smc_lgr_terminate_work(struct work_struct *work) 286 { 287 struct smc_link_group *lgr = container_of(work, struct smc_link_group, 288 terminate_work); 289 290 __smc_lgr_terminate(lgr, true); 291 } 292 293 /* return next unique link id for the lgr */ 294 static u8 smcr_next_link_id(struct smc_link_group *lgr) 295 { 296 u8 link_id; 297 int i; 298 299 while (1) { 300 link_id = ++lgr->next_link_id; 301 if (!link_id) /* skip zero as link_id */ 302 link_id = ++lgr->next_link_id; 303 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 304 if (smc_link_usable(&lgr->lnk[i]) && 305 lgr->lnk[i].link_id == link_id) 306 continue; 307 } 308 break; 309 } 310 return link_id; 311 } 312 313 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, 314 u8 link_idx, struct smc_init_info *ini) 315 { 316 u8 rndvec[3]; 317 int rc; 318 319 get_device(&ini->ib_dev->ibdev->dev); 320 atomic_inc(&ini->ib_dev->lnk_cnt); 321 lnk->link_id = smcr_next_link_id(lgr); 322 lnk->lgr = lgr; 323 lnk->link_idx = link_idx; 324 lnk->smcibdev = ini->ib_dev; 325 lnk->ibport = ini->ib_port; 326 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 327 smc_llc_link_set_uid(lnk); 328 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); 329 if (!ini->ib_dev->initialized) { 330 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); 331 if (rc) 332 goto out; 333 } 334 get_random_bytes(rndvec, sizeof(rndvec)); 335 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 336 (rndvec[2] << 16); 337 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 338 ini->vlan_id, lnk->gid, &lnk->sgid_index); 339 if (rc) 340 goto out; 341 rc = smc_llc_link_init(lnk); 342 if (rc) 343 goto out; 344 rc = smc_wr_alloc_link_mem(lnk); 345 if (rc) 346 goto clear_llc_lnk; 347 rc = smc_ib_create_protection_domain(lnk); 348 if (rc) 349 goto free_link_mem; 350 rc = smc_ib_create_queue_pair(lnk); 351 if (rc) 352 goto dealloc_pd; 353 rc = smc_wr_create_link(lnk); 354 if (rc) 355 goto destroy_qp; 356 lnk->state = SMC_LNK_ACTIVATING; 357 return 0; 358 359 destroy_qp: 360 smc_ib_destroy_queue_pair(lnk); 361 dealloc_pd: 362 smc_ib_dealloc_protection_domain(lnk); 363 free_link_mem: 364 smc_wr_free_link_mem(lnk); 365 clear_llc_lnk: 366 smc_llc_link_clear(lnk, false); 367 out: 368 put_device(&ini->ib_dev->ibdev->dev); 369 memset(lnk, 0, sizeof(struct smc_link)); 370 lnk->state = SMC_LNK_UNUSED; 371 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) 372 wake_up(&ini->ib_dev->lnks_deleted); 373 return rc; 374 } 375 376 /* create a new SMC link group */ 377 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 378 { 379 struct smc_link_group *lgr; 380 struct list_head *lgr_list; 381 struct smc_link *lnk; 382 spinlock_t *lgr_lock; 383 u8 link_idx; 384 int rc = 0; 385 int i; 386 387 if (ini->is_smcd && ini->vlan_id) { 388 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 389 rc = SMC_CLC_DECL_ISMVLANERR; 390 goto out; 391 } 392 } 393 394 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 395 if (!lgr) { 396 rc = SMC_CLC_DECL_MEM; 397 goto ism_put_vlan; 398 } 399 lgr->is_smcd = ini->is_smcd; 400 lgr->sync_err = 0; 401 lgr->terminating = 0; 402 lgr->freefast = 0; 403 lgr->freeing = 0; 404 lgr->vlan_id = ini->vlan_id; 405 mutex_init(&lgr->sndbufs_lock); 406 mutex_init(&lgr->rmbs_lock); 407 rwlock_init(&lgr->conns_lock); 408 for (i = 0; i < SMC_RMBE_SIZES; i++) { 409 INIT_LIST_HEAD(&lgr->sndbufs[i]); 410 INIT_LIST_HEAD(&lgr->rmbs[i]); 411 } 412 lgr->next_link_id = 0; 413 smc_lgr_list.num += SMC_LGR_NUM_INCR; 414 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 415 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 416 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); 417 lgr->conns_all = RB_ROOT; 418 if (ini->is_smcd) { 419 /* SMC-D specific settings */ 420 get_device(&ini->ism_dev->dev); 421 lgr->peer_gid = ini->ism_gid; 422 lgr->smcd = ini->ism_dev; 423 lgr_list = &ini->ism_dev->lgr_list; 424 lgr_lock = &lgr->smcd->lgr_lock; 425 lgr->peer_shutdown = 0; 426 atomic_inc(&ini->ism_dev->lgr_cnt); 427 } else { 428 /* SMC-R specific settings */ 429 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 430 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 431 SMC_SYSTEMID_LEN); 432 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], 433 SMC_MAX_PNETID_LEN); 434 smc_llc_lgr_init(lgr, smc); 435 436 link_idx = SMC_SINGLE_LINK; 437 lnk = &lgr->lnk[link_idx]; 438 rc = smcr_link_init(lgr, lnk, link_idx, ini); 439 if (rc) 440 goto free_lgr; 441 lgr_list = &smc_lgr_list.list; 442 lgr_lock = &smc_lgr_list.lock; 443 atomic_inc(&lgr_cnt); 444 } 445 smc->conn.lgr = lgr; 446 spin_lock_bh(lgr_lock); 447 list_add_tail(&lgr->list, lgr_list); 448 spin_unlock_bh(lgr_lock); 449 return 0; 450 451 free_lgr: 452 kfree(lgr); 453 ism_put_vlan: 454 if (ini->is_smcd && ini->vlan_id) 455 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 456 out: 457 if (rc < 0) { 458 if (rc == -ENOMEM) 459 rc = SMC_CLC_DECL_MEM; 460 else 461 rc = SMC_CLC_DECL_INTERR; 462 } 463 return rc; 464 } 465 466 static int smc_write_space(struct smc_connection *conn) 467 { 468 int buffer_len = conn->peer_rmbe_size; 469 union smc_host_cursor prod; 470 union smc_host_cursor cons; 471 int space; 472 473 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); 474 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 475 /* determine rx_buf space */ 476 space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); 477 return space; 478 } 479 480 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, 481 struct smc_wr_buf *wr_buf) 482 { 483 struct smc_connection *conn = &smc->conn; 484 union smc_host_cursor cons, fin; 485 int rc = 0; 486 int diff; 487 488 smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); 489 smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); 490 /* set prod cursor to old state, enforce tx_rdma_writes() */ 491 smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); 492 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 493 494 if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { 495 /* cons cursor advanced more than fin, and prod was set 496 * fin above, so now prod is smaller than cons. Fix that. 497 */ 498 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); 499 smc_curs_add(conn->sndbuf_desc->len, 500 &conn->tx_curs_sent, diff); 501 smc_curs_add(conn->sndbuf_desc->len, 502 &conn->tx_curs_fin, diff); 503 504 smp_mb__before_atomic(); 505 atomic_add(diff, &conn->sndbuf_space); 506 smp_mb__after_atomic(); 507 508 smc_curs_add(conn->peer_rmbe_size, 509 &conn->local_tx_ctrl.prod, diff); 510 smc_curs_add(conn->peer_rmbe_size, 511 &conn->local_tx_ctrl_fin, diff); 512 } 513 /* recalculate, value is used by tx_rdma_writes() */ 514 atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); 515 516 if (smc->sk.sk_state != SMC_INIT && 517 smc->sk.sk_state != SMC_CLOSED) { 518 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); 519 if (!rc) { 520 schedule_delayed_work(&conn->tx_work, 0); 521 smc->sk.sk_data_ready(&smc->sk); 522 } 523 } else { 524 smc_wr_tx_put_slot(conn->lnk, 525 (struct smc_wr_tx_pend_priv *)pend); 526 } 527 return rc; 528 } 529 530 struct smc_link *smc_switch_conns(struct smc_link_group *lgr, 531 struct smc_link *from_lnk, bool is_dev_err) 532 { 533 struct smc_link *to_lnk = NULL; 534 struct smc_cdc_tx_pend *pend; 535 struct smc_connection *conn; 536 struct smc_wr_buf *wr_buf; 537 struct smc_sock *smc; 538 struct rb_node *node; 539 int i, rc = 0; 540 541 /* link is inactive, wake up tx waiters */ 542 smc_wr_wakeup_tx_wait(from_lnk); 543 544 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 545 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx) 546 continue; 547 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && 548 from_lnk->ibport == lgr->lnk[i].ibport) { 549 continue; 550 } 551 to_lnk = &lgr->lnk[i]; 552 break; 553 } 554 if (!to_lnk) { 555 smc_lgr_terminate_sched(lgr); 556 return NULL; 557 } 558 again: 559 read_lock_bh(&lgr->conns_lock); 560 for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { 561 conn = rb_entry(node, struct smc_connection, alert_node); 562 if (conn->lnk != from_lnk) 563 continue; 564 smc = container_of(conn, struct smc_sock, conn); 565 /* conn->lnk not yet set in SMC_INIT state */ 566 if (smc->sk.sk_state == SMC_INIT) 567 continue; 568 if (smc->sk.sk_state == SMC_CLOSED || 569 smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || 570 smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || 571 smc->sk.sk_state == SMC_APPFINCLOSEWAIT || 572 smc->sk.sk_state == SMC_APPCLOSEWAIT1 || 573 smc->sk.sk_state == SMC_APPCLOSEWAIT2 || 574 smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || 575 smc->sk.sk_state == SMC_PEERABORTWAIT || 576 smc->sk.sk_state == SMC_PROCESSABORT) { 577 spin_lock_bh(&conn->send_lock); 578 conn->lnk = to_lnk; 579 spin_unlock_bh(&conn->send_lock); 580 continue; 581 } 582 sock_hold(&smc->sk); 583 read_unlock_bh(&lgr->conns_lock); 584 /* pre-fetch buffer outside of send_lock, might sleep */ 585 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); 586 if (rc) { 587 smcr_link_down_cond_sched(to_lnk); 588 return NULL; 589 } 590 /* avoid race with smcr_tx_sndbuf_nonempty() */ 591 spin_lock_bh(&conn->send_lock); 592 conn->lnk = to_lnk; 593 rc = smc_switch_cursor(smc, pend, wr_buf); 594 spin_unlock_bh(&conn->send_lock); 595 sock_put(&smc->sk); 596 if (rc) { 597 smcr_link_down_cond_sched(to_lnk); 598 return NULL; 599 } 600 goto again; 601 } 602 read_unlock_bh(&lgr->conns_lock); 603 return to_lnk; 604 } 605 606 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, 607 struct smc_link_group *lgr) 608 { 609 int rc; 610 611 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { 612 /* unregister rmb with peer */ 613 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 614 if (!rc) { 615 /* protect against smc_llc_cli_rkey_exchange() */ 616 mutex_lock(&lgr->llc_conf_mutex); 617 smc_llc_do_delete_rkey(lgr, rmb_desc); 618 rmb_desc->is_conf_rkey = false; 619 mutex_unlock(&lgr->llc_conf_mutex); 620 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 621 } 622 } 623 624 if (rmb_desc->is_reg_err) { 625 /* buf registration failed, reuse not possible */ 626 mutex_lock(&lgr->rmbs_lock); 627 list_del(&rmb_desc->list); 628 mutex_unlock(&lgr->rmbs_lock); 629 630 smc_buf_free(lgr, true, rmb_desc); 631 } else { 632 rmb_desc->used = 0; 633 } 634 } 635 636 static void smc_buf_unuse(struct smc_connection *conn, 637 struct smc_link_group *lgr) 638 { 639 if (conn->sndbuf_desc) 640 conn->sndbuf_desc->used = 0; 641 if (conn->rmb_desc && lgr->is_smcd) 642 conn->rmb_desc->used = 0; 643 else if (conn->rmb_desc) 644 smcr_buf_unuse(conn->rmb_desc, lgr); 645 } 646 647 /* remove a finished connection from its link group */ 648 void smc_conn_free(struct smc_connection *conn) 649 { 650 struct smc_link_group *lgr = conn->lgr; 651 652 if (!lgr) 653 return; 654 if (lgr->is_smcd) { 655 if (!list_empty(&lgr->list)) 656 smc_ism_unset_conn(conn); 657 tasklet_kill(&conn->rx_tsklet); 658 } else { 659 smc_cdc_tx_dismiss_slots(conn); 660 if (current_work() != &conn->abort_work) 661 cancel_work_sync(&conn->abort_work); 662 } 663 if (!list_empty(&lgr->list)) { 664 smc_lgr_unregister_conn(conn); 665 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 666 } 667 668 if (!lgr->conns_num) 669 smc_lgr_schedule_free_work(lgr); 670 } 671 672 /* unregister a link from a buf_desc */ 673 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, 674 struct smc_link *lnk) 675 { 676 if (is_rmb) 677 buf_desc->is_reg_mr[lnk->link_idx] = false; 678 if (!buf_desc->is_map_ib[lnk->link_idx]) 679 return; 680 if (is_rmb) { 681 if (buf_desc->mr_rx[lnk->link_idx]) { 682 smc_ib_put_memory_region( 683 buf_desc->mr_rx[lnk->link_idx]); 684 buf_desc->mr_rx[lnk->link_idx] = NULL; 685 } 686 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); 687 } else { 688 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); 689 } 690 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 691 buf_desc->is_map_ib[lnk->link_idx] = false; 692 } 693 694 /* unmap all buffers of lgr for a deleted link */ 695 static void smcr_buf_unmap_lgr(struct smc_link *lnk) 696 { 697 struct smc_link_group *lgr = lnk->lgr; 698 struct smc_buf_desc *buf_desc, *bf; 699 int i; 700 701 for (i = 0; i < SMC_RMBE_SIZES; i++) { 702 mutex_lock(&lgr->rmbs_lock); 703 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) 704 smcr_buf_unmap_link(buf_desc, true, lnk); 705 mutex_unlock(&lgr->rmbs_lock); 706 mutex_lock(&lgr->sndbufs_lock); 707 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], 708 list) 709 smcr_buf_unmap_link(buf_desc, false, lnk); 710 mutex_unlock(&lgr->sndbufs_lock); 711 } 712 } 713 714 static void smcr_rtoken_clear_link(struct smc_link *lnk) 715 { 716 struct smc_link_group *lgr = lnk->lgr; 717 int i; 718 719 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 720 lgr->rtokens[i][lnk->link_idx].rkey = 0; 721 lgr->rtokens[i][lnk->link_idx].dma_addr = 0; 722 } 723 } 724 725 /* must be called under lgr->llc_conf_mutex lock */ 726 void smcr_link_clear(struct smc_link *lnk, bool log) 727 { 728 struct smc_ib_device *smcibdev; 729 730 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) 731 return; 732 lnk->peer_qpn = 0; 733 smc_llc_link_clear(lnk, log); 734 smcr_buf_unmap_lgr(lnk); 735 smcr_rtoken_clear_link(lnk); 736 smc_ib_modify_qp_reset(lnk); 737 smc_wr_free_link(lnk); 738 smc_ib_destroy_queue_pair(lnk); 739 smc_ib_dealloc_protection_domain(lnk); 740 smc_wr_free_link_mem(lnk); 741 put_device(&lnk->smcibdev->ibdev->dev); 742 smcibdev = lnk->smcibdev; 743 memset(lnk, 0, sizeof(struct smc_link)); 744 lnk->state = SMC_LNK_UNUSED; 745 if (!atomic_dec_return(&smcibdev->lnk_cnt)) 746 wake_up(&smcibdev->lnks_deleted); 747 } 748 749 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 750 struct smc_buf_desc *buf_desc) 751 { 752 int i; 753 754 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 755 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); 756 757 if (buf_desc->pages) 758 __free_pages(buf_desc->pages, buf_desc->order); 759 kfree(buf_desc); 760 } 761 762 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 763 struct smc_buf_desc *buf_desc) 764 { 765 if (is_dmb) { 766 /* restore original buf len */ 767 buf_desc->len += sizeof(struct smcd_cdc_msg); 768 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 769 } else { 770 kfree(buf_desc->cpu_addr); 771 } 772 kfree(buf_desc); 773 } 774 775 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 776 struct smc_buf_desc *buf_desc) 777 { 778 if (lgr->is_smcd) 779 smcd_buf_free(lgr, is_rmb, buf_desc); 780 else 781 smcr_buf_free(lgr, is_rmb, buf_desc); 782 } 783 784 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 785 { 786 struct smc_buf_desc *buf_desc, *bf_desc; 787 struct list_head *buf_list; 788 int i; 789 790 for (i = 0; i < SMC_RMBE_SIZES; i++) { 791 if (is_rmb) 792 buf_list = &lgr->rmbs[i]; 793 else 794 buf_list = &lgr->sndbufs[i]; 795 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 796 list) { 797 list_del(&buf_desc->list); 798 smc_buf_free(lgr, is_rmb, buf_desc); 799 } 800 } 801 } 802 803 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 804 { 805 /* free send buffers */ 806 __smc_lgr_free_bufs(lgr, false); 807 /* free rmbs */ 808 __smc_lgr_free_bufs(lgr, true); 809 } 810 811 /* remove a link group */ 812 static void smc_lgr_free(struct smc_link_group *lgr) 813 { 814 int i; 815 816 if (!lgr->is_smcd) { 817 mutex_lock(&lgr->llc_conf_mutex); 818 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 819 if (lgr->lnk[i].state != SMC_LNK_UNUSED) 820 smcr_link_clear(&lgr->lnk[i], false); 821 } 822 mutex_unlock(&lgr->llc_conf_mutex); 823 smc_llc_lgr_clear(lgr); 824 } 825 826 smc_lgr_free_bufs(lgr); 827 if (lgr->is_smcd) { 828 if (!lgr->terminating) { 829 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 830 put_device(&lgr->smcd->dev); 831 } 832 if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) 833 wake_up(&lgr->smcd->lgrs_deleted); 834 } else { 835 if (!atomic_dec_return(&lgr_cnt)) 836 wake_up(&lgrs_deleted); 837 } 838 kfree(lgr); 839 } 840 841 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) 842 { 843 int i; 844 845 for (i = 0; i < SMC_RMBE_SIZES; i++) { 846 struct smc_buf_desc *buf_desc; 847 848 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { 849 buf_desc->len += sizeof(struct smcd_cdc_msg); 850 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 851 } 852 } 853 } 854 855 static void smc_sk_wake_ups(struct smc_sock *smc) 856 { 857 smc->sk.sk_write_space(&smc->sk); 858 smc->sk.sk_data_ready(&smc->sk); 859 smc->sk.sk_state_change(&smc->sk); 860 } 861 862 /* kill a connection */ 863 static void smc_conn_kill(struct smc_connection *conn, bool soft) 864 { 865 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 866 867 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) 868 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 869 else 870 smc_close_abort(conn); 871 conn->killed = 1; 872 smc->sk.sk_err = ECONNABORTED; 873 smc_sk_wake_ups(smc); 874 if (conn->lgr->is_smcd) { 875 smc_ism_unset_conn(conn); 876 if (soft) 877 tasklet_kill(&conn->rx_tsklet); 878 else 879 tasklet_unlock_wait(&conn->rx_tsklet); 880 } else { 881 smc_cdc_tx_dismiss_slots(conn); 882 } 883 smc_lgr_unregister_conn(conn); 884 smc_close_active_abort(smc); 885 } 886 887 static void smc_lgr_cleanup(struct smc_link_group *lgr) 888 { 889 if (lgr->is_smcd) { 890 smc_ism_signal_shutdown(lgr); 891 smcd_unregister_all_dmbs(lgr); 892 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 893 put_device(&lgr->smcd->dev); 894 } else { 895 u32 rsn = lgr->llc_termination_rsn; 896 897 if (!rsn) 898 rsn = SMC_LLC_DEL_PROG_INIT_TERM; 899 smc_llc_send_link_delete_all(lgr, false, rsn); 900 smcr_lgr_link_deactivate_all(lgr); 901 } 902 } 903 904 /* terminate link group 905 * @soft: true if link group shutdown can take its time 906 * false if immediate link group shutdown is required 907 */ 908 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) 909 { 910 struct smc_connection *conn; 911 struct smc_sock *smc; 912 struct rb_node *node; 913 914 if (lgr->terminating) 915 return; /* lgr already terminating */ 916 /* cancel free_work sync, will terminate when lgr->freeing is set */ 917 cancel_delayed_work_sync(&lgr->free_work); 918 lgr->terminating = 1; 919 920 /* kill remaining link group connections */ 921 read_lock_bh(&lgr->conns_lock); 922 node = rb_first(&lgr->conns_all); 923 while (node) { 924 read_unlock_bh(&lgr->conns_lock); 925 conn = rb_entry(node, struct smc_connection, alert_node); 926 smc = container_of(conn, struct smc_sock, conn); 927 sock_hold(&smc->sk); /* sock_put below */ 928 lock_sock(&smc->sk); 929 smc_conn_kill(conn, soft); 930 release_sock(&smc->sk); 931 sock_put(&smc->sk); /* sock_hold above */ 932 read_lock_bh(&lgr->conns_lock); 933 node = rb_first(&lgr->conns_all); 934 } 935 read_unlock_bh(&lgr->conns_lock); 936 smc_lgr_cleanup(lgr); 937 smc_lgr_free(lgr); 938 } 939 940 /* unlink link group and schedule termination */ 941 void smc_lgr_terminate_sched(struct smc_link_group *lgr) 942 { 943 spinlock_t *lgr_lock; 944 945 smc_lgr_list_head(lgr, &lgr_lock); 946 spin_lock_bh(lgr_lock); 947 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { 948 spin_unlock_bh(lgr_lock); 949 return; /* lgr already terminating */ 950 } 951 list_del_init(&lgr->list); 952 lgr->freeing = 1; 953 spin_unlock_bh(lgr_lock); 954 schedule_work(&lgr->terminate_work); 955 } 956 957 /* Called when peer lgr shutdown (regularly or abnormally) is received */ 958 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 959 { 960 struct smc_link_group *lgr, *l; 961 LIST_HEAD(lgr_free_list); 962 963 /* run common cleanup function and build free list */ 964 spin_lock_bh(&dev->lgr_lock); 965 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { 966 if ((!peer_gid || lgr->peer_gid == peer_gid) && 967 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 968 if (peer_gid) /* peer triggered termination */ 969 lgr->peer_shutdown = 1; 970 list_move(&lgr->list, &lgr_free_list); 971 lgr->freeing = 1; 972 } 973 } 974 spin_unlock_bh(&dev->lgr_lock); 975 976 /* cancel the regular free workers and actually free lgrs */ 977 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 978 list_del_init(&lgr->list); 979 schedule_work(&lgr->terminate_work); 980 } 981 } 982 983 /* Called when an SMCD device is removed or the smc module is unloaded */ 984 void smc_smcd_terminate_all(struct smcd_dev *smcd) 985 { 986 struct smc_link_group *lgr, *lg; 987 LIST_HEAD(lgr_free_list); 988 989 spin_lock_bh(&smcd->lgr_lock); 990 list_splice_init(&smcd->lgr_list, &lgr_free_list); 991 list_for_each_entry(lgr, &lgr_free_list, list) 992 lgr->freeing = 1; 993 spin_unlock_bh(&smcd->lgr_lock); 994 995 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 996 list_del_init(&lgr->list); 997 __smc_lgr_terminate(lgr, false); 998 } 999 1000 if (atomic_read(&smcd->lgr_cnt)) 1001 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); 1002 } 1003 1004 /* Called when an SMCR device is removed or the smc module is unloaded. 1005 * If smcibdev is given, all SMCR link groups using this device are terminated. 1006 * If smcibdev is NULL, all SMCR link groups are terminated. 1007 */ 1008 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) 1009 { 1010 struct smc_link_group *lgr, *lg; 1011 LIST_HEAD(lgr_free_list); 1012 int i; 1013 1014 spin_lock_bh(&smc_lgr_list.lock); 1015 if (!smcibdev) { 1016 list_splice_init(&smc_lgr_list.list, &lgr_free_list); 1017 list_for_each_entry(lgr, &lgr_free_list, list) 1018 lgr->freeing = 1; 1019 } else { 1020 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { 1021 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1022 if (lgr->lnk[i].smcibdev == smcibdev) 1023 smcr_link_down_cond_sched(&lgr->lnk[i]); 1024 } 1025 } 1026 } 1027 spin_unlock_bh(&smc_lgr_list.lock); 1028 1029 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 1030 list_del_init(&lgr->list); 1031 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); 1032 __smc_lgr_terminate(lgr, false); 1033 } 1034 1035 if (smcibdev) { 1036 if (atomic_read(&smcibdev->lnk_cnt)) 1037 wait_event(smcibdev->lnks_deleted, 1038 !atomic_read(&smcibdev->lnk_cnt)); 1039 } else { 1040 if (atomic_read(&lgr_cnt)) 1041 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); 1042 } 1043 } 1044 1045 /* set new lgr type and clear all asymmetric link tagging */ 1046 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) 1047 { 1048 char *lgr_type = ""; 1049 int i; 1050 1051 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 1052 if (smc_link_usable(&lgr->lnk[i])) 1053 lgr->lnk[i].link_is_asym = false; 1054 if (lgr->type == new_type) 1055 return; 1056 lgr->type = new_type; 1057 1058 switch (lgr->type) { 1059 case SMC_LGR_NONE: 1060 lgr_type = "NONE"; 1061 break; 1062 case SMC_LGR_SINGLE: 1063 lgr_type = "SINGLE"; 1064 break; 1065 case SMC_LGR_SYMMETRIC: 1066 lgr_type = "SYMMETRIC"; 1067 break; 1068 case SMC_LGR_ASYMMETRIC_PEER: 1069 lgr_type = "ASYMMETRIC_PEER"; 1070 break; 1071 case SMC_LGR_ASYMMETRIC_LOCAL: 1072 lgr_type = "ASYMMETRIC_LOCAL"; 1073 break; 1074 } 1075 pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " 1076 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, 1077 lgr_type, lgr->pnet_id); 1078 } 1079 1080 /* set new lgr type and tag a link as asymmetric */ 1081 void smcr_lgr_set_type_asym(struct smc_link_group *lgr, 1082 enum smc_lgr_type new_type, int asym_lnk_idx) 1083 { 1084 smcr_lgr_set_type(lgr, new_type); 1085 lgr->lnk[asym_lnk_idx].link_is_asym = true; 1086 } 1087 1088 /* abort connection, abort_work scheduled from tasklet context */ 1089 static void smc_conn_abort_work(struct work_struct *work) 1090 { 1091 struct smc_connection *conn = container_of(work, 1092 struct smc_connection, 1093 abort_work); 1094 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 1095 1096 smc_conn_kill(conn, true); 1097 sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ 1098 } 1099 1100 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) 1101 { 1102 struct smc_link_group *lgr, *n; 1103 1104 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1105 struct smc_link *link; 1106 1107 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1108 SMC_MAX_PNETID_LEN) || 1109 lgr->type == SMC_LGR_SYMMETRIC || 1110 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1111 continue; 1112 1113 /* trigger local add link processing */ 1114 link = smc_llc_usable_link(lgr); 1115 if (link) 1116 smc_llc_add_link_local(link); 1117 } 1118 } 1119 1120 /* link is down - switch connections to alternate link, 1121 * must be called under lgr->llc_conf_mutex lock 1122 */ 1123 static void smcr_link_down(struct smc_link *lnk) 1124 { 1125 struct smc_link_group *lgr = lnk->lgr; 1126 struct smc_link *to_lnk; 1127 int del_link_id; 1128 1129 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) 1130 return; 1131 1132 smc_ib_modify_qp_reset(lnk); 1133 to_lnk = smc_switch_conns(lgr, lnk, true); 1134 if (!to_lnk) { /* no backup link available */ 1135 smcr_link_clear(lnk, true); 1136 return; 1137 } 1138 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); 1139 del_link_id = lnk->link_id; 1140 1141 if (lgr->role == SMC_SERV) { 1142 /* trigger local delete link processing */ 1143 smc_llc_srv_delete_link_local(to_lnk, del_link_id); 1144 } else { 1145 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1146 /* another llc task is ongoing */ 1147 mutex_unlock(&lgr->llc_conf_mutex); 1148 wait_event_timeout(lgr->llc_flow_waiter, 1149 (list_empty(&lgr->list) || 1150 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1151 SMC_LLC_WAIT_TIME); 1152 mutex_lock(&lgr->llc_conf_mutex); 1153 } 1154 if (!list_empty(&lgr->list)) { 1155 smc_llc_send_delete_link(to_lnk, del_link_id, 1156 SMC_LLC_REQ, true, 1157 SMC_LLC_DEL_LOST_PATH); 1158 smcr_link_clear(lnk, true); 1159 } 1160 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ 1161 } 1162 } 1163 1164 /* must be called under lgr->llc_conf_mutex lock */ 1165 void smcr_link_down_cond(struct smc_link *lnk) 1166 { 1167 if (smc_link_downing(&lnk->state)) 1168 smcr_link_down(lnk); 1169 } 1170 1171 /* will get the lgr->llc_conf_mutex lock */ 1172 void smcr_link_down_cond_sched(struct smc_link *lnk) 1173 { 1174 if (smc_link_downing(&lnk->state)) 1175 schedule_work(&lnk->link_down_wrk); 1176 } 1177 1178 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) 1179 { 1180 struct smc_link_group *lgr, *n; 1181 int i; 1182 1183 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1184 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1185 SMC_MAX_PNETID_LEN)) 1186 continue; /* lgr is not affected */ 1187 if (list_empty(&lgr->list)) 1188 continue; 1189 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1190 struct smc_link *lnk = &lgr->lnk[i]; 1191 1192 if (smc_link_usable(lnk) && 1193 lnk->smcibdev == smcibdev && lnk->ibport == ibport) 1194 smcr_link_down_cond_sched(lnk); 1195 } 1196 } 1197 } 1198 1199 static void smc_link_down_work(struct work_struct *work) 1200 { 1201 struct smc_link *link = container_of(work, struct smc_link, 1202 link_down_wrk); 1203 struct smc_link_group *lgr = link->lgr; 1204 1205 if (list_empty(&lgr->list)) 1206 return; 1207 wake_up_all(&lgr->llc_msg_waiter); 1208 mutex_lock(&lgr->llc_conf_mutex); 1209 smcr_link_down(link); 1210 mutex_unlock(&lgr->llc_conf_mutex); 1211 } 1212 1213 /* Determine vlan of internal TCP socket. 1214 * @vlan_id: address to store the determined vlan id into 1215 */ 1216 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 1217 { 1218 struct dst_entry *dst = sk_dst_get(clcsock->sk); 1219 struct net_device *ndev; 1220 int i, nest_lvl, rc = 0; 1221 1222 ini->vlan_id = 0; 1223 if (!dst) { 1224 rc = -ENOTCONN; 1225 goto out; 1226 } 1227 if (!dst->dev) { 1228 rc = -ENODEV; 1229 goto out_rel; 1230 } 1231 1232 ndev = dst->dev; 1233 if (is_vlan_dev(ndev)) { 1234 ini->vlan_id = vlan_dev_vlan_id(ndev); 1235 goto out_rel; 1236 } 1237 1238 rtnl_lock(); 1239 nest_lvl = ndev->lower_level; 1240 for (i = 0; i < nest_lvl; i++) { 1241 struct list_head *lower = &ndev->adj_list.lower; 1242 1243 if (list_empty(lower)) 1244 break; 1245 lower = lower->next; 1246 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 1247 if (is_vlan_dev(ndev)) { 1248 ini->vlan_id = vlan_dev_vlan_id(ndev); 1249 break; 1250 } 1251 } 1252 rtnl_unlock(); 1253 1254 out_rel: 1255 dst_release(dst); 1256 out: 1257 return rc; 1258 } 1259 1260 static bool smcr_lgr_match(struct smc_link_group *lgr, 1261 struct smc_clc_msg_local *lcl, 1262 enum smc_lgr_role role, u32 clcqpn) 1263 { 1264 int i; 1265 1266 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || 1267 lgr->role != role) 1268 return false; 1269 1270 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1271 if (!smc_link_active(&lgr->lnk[i])) 1272 continue; 1273 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && 1274 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && 1275 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) 1276 return true; 1277 } 1278 return false; 1279 } 1280 1281 static bool smcd_lgr_match(struct smc_link_group *lgr, 1282 struct smcd_dev *smcismdev, u64 peer_gid) 1283 { 1284 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 1285 } 1286 1287 /* create a new SMC connection (and a new link group if necessary) */ 1288 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 1289 { 1290 struct smc_connection *conn = &smc->conn; 1291 struct list_head *lgr_list; 1292 struct smc_link_group *lgr; 1293 enum smc_lgr_role role; 1294 spinlock_t *lgr_lock; 1295 int rc = 0; 1296 1297 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; 1298 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; 1299 ini->cln_first_contact = SMC_FIRST_CONTACT; 1300 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 1301 if (role == SMC_CLNT && ini->srv_first_contact) 1302 /* create new link group as well */ 1303 goto create; 1304 1305 /* determine if an existing link group can be reused */ 1306 spin_lock_bh(lgr_lock); 1307 list_for_each_entry(lgr, lgr_list, list) { 1308 write_lock_bh(&lgr->conns_lock); 1309 if ((ini->is_smcd ? 1310 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 1311 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 1312 !lgr->sync_err && 1313 lgr->vlan_id == ini->vlan_id && 1314 (role == SMC_CLNT || ini->is_smcd || 1315 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 1316 /* link group found */ 1317 ini->cln_first_contact = SMC_REUSE_CONTACT; 1318 conn->lgr = lgr; 1319 rc = smc_lgr_register_conn(conn, false); 1320 write_unlock_bh(&lgr->conns_lock); 1321 if (!rc && delayed_work_pending(&lgr->free_work)) 1322 cancel_delayed_work(&lgr->free_work); 1323 break; 1324 } 1325 write_unlock_bh(&lgr->conns_lock); 1326 } 1327 spin_unlock_bh(lgr_lock); 1328 if (rc) 1329 return rc; 1330 1331 if (role == SMC_CLNT && !ini->srv_first_contact && 1332 ini->cln_first_contact == SMC_FIRST_CONTACT) { 1333 /* Server reuses a link group, but Client wants to start 1334 * a new one 1335 * send out_of_sync decline, reason synchr. error 1336 */ 1337 return SMC_CLC_DECL_SYNCERR; 1338 } 1339 1340 create: 1341 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 1342 rc = smc_lgr_create(smc, ini); 1343 if (rc) 1344 goto out; 1345 lgr = conn->lgr; 1346 write_lock_bh(&lgr->conns_lock); 1347 rc = smc_lgr_register_conn(conn, true); 1348 write_unlock_bh(&lgr->conns_lock); 1349 if (rc) 1350 goto out; 1351 } 1352 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 1353 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 1354 conn->urg_state = SMC_URG_READ; 1355 INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); 1356 if (ini->is_smcd) { 1357 conn->rx_off = sizeof(struct smcd_cdc_msg); 1358 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 1359 } else { 1360 conn->rx_off = 0; 1361 } 1362 #ifndef KERNEL_HAS_ATOMIC64 1363 spin_lock_init(&conn->acurs_lock); 1364 #endif 1365 1366 out: 1367 return rc; 1368 } 1369 1370 /* convert the RMB size into the compressed notation - minimum 16K. 1371 * In contrast to plain ilog2, this rounds towards the next power of 2, 1372 * so the socket application gets at least its desired sndbuf / rcvbuf size. 1373 */ 1374 static u8 smc_compress_bufsize(int size) 1375 { 1376 u8 compressed; 1377 1378 if (size <= SMC_BUF_MIN_SIZE) 1379 return 0; 1380 1381 size = (size - 1) >> 14; 1382 compressed = ilog2(size) + 1; 1383 if (compressed >= SMC_RMBE_SIZES) 1384 compressed = SMC_RMBE_SIZES - 1; 1385 return compressed; 1386 } 1387 1388 /* convert the RMB size from compressed notation into integer */ 1389 int smc_uncompress_bufsize(u8 compressed) 1390 { 1391 u32 size; 1392 1393 size = 0x00000001 << (((int)compressed) + 14); 1394 return (int)size; 1395 } 1396 1397 /* try to reuse a sndbuf or rmb description slot for a certain 1398 * buffer size; if not available, return NULL 1399 */ 1400 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 1401 struct mutex *lock, 1402 struct list_head *buf_list) 1403 { 1404 struct smc_buf_desc *buf_slot; 1405 1406 mutex_lock(lock); 1407 list_for_each_entry(buf_slot, buf_list, list) { 1408 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 1409 mutex_unlock(lock); 1410 return buf_slot; 1411 } 1412 } 1413 mutex_unlock(lock); 1414 return NULL; 1415 } 1416 1417 /* one of the conditions for announcing a receiver's current window size is 1418 * that it "results in a minimum increase in the window size of 10% of the 1419 * receive buffer space" [RFC7609] 1420 */ 1421 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 1422 { 1423 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 1424 } 1425 1426 /* map an rmb buf to a link */ 1427 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, 1428 struct smc_link *lnk) 1429 { 1430 int rc; 1431 1432 if (buf_desc->is_map_ib[lnk->link_idx]) 1433 return 0; 1434 1435 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); 1436 if (rc) 1437 return rc; 1438 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, 1439 buf_desc->cpu_addr, buf_desc->len); 1440 1441 /* map sg table to DMA address */ 1442 rc = smc_ib_buf_map_sg(lnk, buf_desc, 1443 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1444 /* SMC protocol depends on mapping to one DMA address only */ 1445 if (rc != 1) { 1446 rc = -EAGAIN; 1447 goto free_table; 1448 } 1449 1450 /* create a new memory region for the RMB */ 1451 if (is_rmb) { 1452 rc = smc_ib_get_memory_region(lnk->roce_pd, 1453 IB_ACCESS_REMOTE_WRITE | 1454 IB_ACCESS_LOCAL_WRITE, 1455 buf_desc, lnk->link_idx); 1456 if (rc) 1457 goto buf_unmap; 1458 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); 1459 } 1460 buf_desc->is_map_ib[lnk->link_idx] = true; 1461 return 0; 1462 1463 buf_unmap: 1464 smc_ib_buf_unmap_sg(lnk, buf_desc, 1465 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1466 free_table: 1467 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 1468 return rc; 1469 } 1470 1471 /* register a new rmb on IB device, 1472 * must be called under lgr->llc_conf_mutex lock 1473 */ 1474 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) 1475 { 1476 if (list_empty(&link->lgr->list)) 1477 return -ENOLINK; 1478 if (!rmb_desc->is_reg_mr[link->link_idx]) { 1479 /* register memory region for new rmb */ 1480 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { 1481 rmb_desc->is_reg_err = true; 1482 return -EFAULT; 1483 } 1484 rmb_desc->is_reg_mr[link->link_idx] = true; 1485 } 1486 return 0; 1487 } 1488 1489 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, 1490 struct list_head *lst, bool is_rmb) 1491 { 1492 struct smc_buf_desc *buf_desc, *bf; 1493 int rc = 0; 1494 1495 mutex_lock(lock); 1496 list_for_each_entry_safe(buf_desc, bf, lst, list) { 1497 if (!buf_desc->used) 1498 continue; 1499 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); 1500 if (rc) 1501 goto out; 1502 } 1503 out: 1504 mutex_unlock(lock); 1505 return rc; 1506 } 1507 1508 /* map all used buffers of lgr for a new link */ 1509 int smcr_buf_map_lgr(struct smc_link *lnk) 1510 { 1511 struct smc_link_group *lgr = lnk->lgr; 1512 int i, rc = 0; 1513 1514 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1515 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, 1516 &lgr->rmbs[i], true); 1517 if (rc) 1518 return rc; 1519 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, 1520 &lgr->sndbufs[i], false); 1521 if (rc) 1522 return rc; 1523 } 1524 return 0; 1525 } 1526 1527 /* register all used buffers of lgr for a new link, 1528 * must be called under lgr->llc_conf_mutex lock 1529 */ 1530 int smcr_buf_reg_lgr(struct smc_link *lnk) 1531 { 1532 struct smc_link_group *lgr = lnk->lgr; 1533 struct smc_buf_desc *buf_desc, *bf; 1534 int i, rc = 0; 1535 1536 mutex_lock(&lgr->rmbs_lock); 1537 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1538 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { 1539 if (!buf_desc->used) 1540 continue; 1541 rc = smcr_link_reg_rmb(lnk, buf_desc); 1542 if (rc) 1543 goto out; 1544 } 1545 } 1546 out: 1547 mutex_unlock(&lgr->rmbs_lock); 1548 return rc; 1549 } 1550 1551 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 1552 bool is_rmb, int bufsize) 1553 { 1554 struct smc_buf_desc *buf_desc; 1555 1556 /* try to alloc a new buffer */ 1557 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1558 if (!buf_desc) 1559 return ERR_PTR(-ENOMEM); 1560 1561 buf_desc->order = get_order(bufsize); 1562 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 1563 __GFP_NOMEMALLOC | __GFP_COMP | 1564 __GFP_NORETRY | __GFP_ZERO, 1565 buf_desc->order); 1566 if (!buf_desc->pages) { 1567 kfree(buf_desc); 1568 return ERR_PTR(-EAGAIN); 1569 } 1570 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 1571 buf_desc->len = bufsize; 1572 return buf_desc; 1573 } 1574 1575 /* map buf_desc on all usable links, 1576 * unused buffers stay mapped as long as the link is up 1577 */ 1578 static int smcr_buf_map_usable_links(struct smc_link_group *lgr, 1579 struct smc_buf_desc *buf_desc, bool is_rmb) 1580 { 1581 int i, rc = 0; 1582 1583 /* protect against parallel link reconfiguration */ 1584 mutex_lock(&lgr->llc_conf_mutex); 1585 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1586 struct smc_link *lnk = &lgr->lnk[i]; 1587 1588 if (!smc_link_usable(lnk)) 1589 continue; 1590 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { 1591 rc = -ENOMEM; 1592 goto out; 1593 } 1594 } 1595 out: 1596 mutex_unlock(&lgr->llc_conf_mutex); 1597 return rc; 1598 } 1599 1600 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 1601 1602 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 1603 bool is_dmb, int bufsize) 1604 { 1605 struct smc_buf_desc *buf_desc; 1606 int rc; 1607 1608 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 1609 return ERR_PTR(-EAGAIN); 1610 1611 /* try to alloc a new DMB */ 1612 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1613 if (!buf_desc) 1614 return ERR_PTR(-ENOMEM); 1615 if (is_dmb) { 1616 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 1617 if (rc) { 1618 kfree(buf_desc); 1619 return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc); 1620 } 1621 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 1622 /* CDC header stored in buf. So, pretend it was smaller */ 1623 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 1624 } else { 1625 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 1626 __GFP_NOWARN | __GFP_NORETRY | 1627 __GFP_NOMEMALLOC); 1628 if (!buf_desc->cpu_addr) { 1629 kfree(buf_desc); 1630 return ERR_PTR(-EAGAIN); 1631 } 1632 buf_desc->len = bufsize; 1633 } 1634 return buf_desc; 1635 } 1636 1637 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 1638 { 1639 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 1640 struct smc_connection *conn = &smc->conn; 1641 struct smc_link_group *lgr = conn->lgr; 1642 struct list_head *buf_list; 1643 int bufsize, bufsize_short; 1644 struct mutex *lock; /* lock buffer list */ 1645 int sk_buf_size; 1646 1647 if (is_rmb) 1648 /* use socket recv buffer size (w/o overhead) as start value */ 1649 sk_buf_size = smc->sk.sk_rcvbuf / 2; 1650 else 1651 /* use socket send buffer size (w/o overhead) as start value */ 1652 sk_buf_size = smc->sk.sk_sndbuf / 2; 1653 1654 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 1655 bufsize_short >= 0; bufsize_short--) { 1656 1657 if (is_rmb) { 1658 lock = &lgr->rmbs_lock; 1659 buf_list = &lgr->rmbs[bufsize_short]; 1660 } else { 1661 lock = &lgr->sndbufs_lock; 1662 buf_list = &lgr->sndbufs[bufsize_short]; 1663 } 1664 bufsize = smc_uncompress_bufsize(bufsize_short); 1665 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 1666 continue; 1667 1668 /* check for reusable slot in the link group */ 1669 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 1670 if (buf_desc) { 1671 memset(buf_desc->cpu_addr, 0, bufsize); 1672 break; /* found reusable slot */ 1673 } 1674 1675 if (is_smcd) 1676 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 1677 else 1678 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 1679 1680 if (PTR_ERR(buf_desc) == -ENOMEM) 1681 break; 1682 if (IS_ERR(buf_desc)) 1683 continue; 1684 1685 buf_desc->used = 1; 1686 mutex_lock(lock); 1687 list_add(&buf_desc->list, buf_list); 1688 mutex_unlock(lock); 1689 break; /* found */ 1690 } 1691 1692 if (IS_ERR(buf_desc)) 1693 return PTR_ERR(buf_desc); 1694 1695 if (!is_smcd) { 1696 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { 1697 smcr_buf_unuse(buf_desc, lgr); 1698 return -ENOMEM; 1699 } 1700 } 1701 1702 if (is_rmb) { 1703 conn->rmb_desc = buf_desc; 1704 conn->rmbe_size_short = bufsize_short; 1705 smc->sk.sk_rcvbuf = bufsize * 2; 1706 atomic_set(&conn->bytes_to_rcv, 0); 1707 conn->rmbe_update_limit = 1708 smc_rmb_wnd_update_limit(buf_desc->len); 1709 if (is_smcd) 1710 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 1711 } else { 1712 conn->sndbuf_desc = buf_desc; 1713 smc->sk.sk_sndbuf = bufsize * 2; 1714 atomic_set(&conn->sndbuf_space, bufsize); 1715 } 1716 return 0; 1717 } 1718 1719 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 1720 { 1721 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) 1722 return; 1723 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1724 } 1725 1726 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 1727 { 1728 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) 1729 return; 1730 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1731 } 1732 1733 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 1734 { 1735 int i; 1736 1737 if (!conn->lgr || conn->lgr->is_smcd) 1738 return; 1739 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1740 if (!smc_link_active(&conn->lgr->lnk[i])) 1741 continue; 1742 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, 1743 DMA_FROM_DEVICE); 1744 } 1745 } 1746 1747 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 1748 { 1749 int i; 1750 1751 if (!conn->lgr || conn->lgr->is_smcd) 1752 return; 1753 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1754 if (!smc_link_active(&conn->lgr->lnk[i])) 1755 continue; 1756 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, 1757 DMA_FROM_DEVICE); 1758 } 1759 } 1760 1761 /* create the send and receive buffer for an SMC socket; 1762 * receive buffers are called RMBs; 1763 * (even though the SMC protocol allows more than one RMB-element per RMB, 1764 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 1765 * extra RMB for every connection in a link group 1766 */ 1767 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 1768 { 1769 int rc; 1770 1771 /* create send buffer */ 1772 rc = __smc_buf_create(smc, is_smcd, false); 1773 if (rc) 1774 return rc; 1775 /* create rmb */ 1776 rc = __smc_buf_create(smc, is_smcd, true); 1777 if (rc) { 1778 mutex_lock(&smc->conn.lgr->sndbufs_lock); 1779 list_del(&smc->conn.sndbuf_desc->list); 1780 mutex_unlock(&smc->conn.lgr->sndbufs_lock); 1781 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 1782 smc->conn.sndbuf_desc = NULL; 1783 } 1784 return rc; 1785 } 1786 1787 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 1788 { 1789 int i; 1790 1791 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 1792 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 1793 return i; 1794 } 1795 return -ENOSPC; 1796 } 1797 1798 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, 1799 u32 rkey) 1800 { 1801 int i; 1802 1803 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1804 if (test_bit(i, lgr->rtokens_used_mask) && 1805 lgr->rtokens[i][lnk_idx].rkey == rkey) 1806 return i; 1807 } 1808 return -ENOENT; 1809 } 1810 1811 /* set rtoken for a new link to an existing rmb */ 1812 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, 1813 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) 1814 { 1815 int rtok_idx; 1816 1817 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); 1818 if (rtok_idx == -ENOENT) 1819 return; 1820 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); 1821 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); 1822 } 1823 1824 /* set rtoken for a new link whose link_id is given */ 1825 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, 1826 __be64 nw_vaddr, __be32 nw_rkey) 1827 { 1828 u64 dma_addr = be64_to_cpu(nw_vaddr); 1829 u32 rkey = ntohl(nw_rkey); 1830 bool found = false; 1831 int link_idx; 1832 1833 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { 1834 if (lgr->lnk[link_idx].link_id == link_id) { 1835 found = true; 1836 break; 1837 } 1838 } 1839 if (!found) 1840 return; 1841 lgr->rtokens[rtok_idx][link_idx].rkey = rkey; 1842 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; 1843 } 1844 1845 /* add a new rtoken from peer */ 1846 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) 1847 { 1848 struct smc_link_group *lgr = smc_get_lgr(lnk); 1849 u64 dma_addr = be64_to_cpu(nw_vaddr); 1850 u32 rkey = ntohl(nw_rkey); 1851 int i; 1852 1853 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1854 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1855 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && 1856 test_bit(i, lgr->rtokens_used_mask)) { 1857 /* already in list */ 1858 return i; 1859 } 1860 } 1861 i = smc_rmb_reserve_rtoken_idx(lgr); 1862 if (i < 0) 1863 return i; 1864 lgr->rtokens[i][lnk->link_idx].rkey = rkey; 1865 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; 1866 return i; 1867 } 1868 1869 /* delete an rtoken from all links */ 1870 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) 1871 { 1872 struct smc_link_group *lgr = smc_get_lgr(lnk); 1873 u32 rkey = ntohl(nw_rkey); 1874 int i, j; 1875 1876 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1877 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1878 test_bit(i, lgr->rtokens_used_mask)) { 1879 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { 1880 lgr->rtokens[i][j].rkey = 0; 1881 lgr->rtokens[i][j].dma_addr = 0; 1882 } 1883 clear_bit(i, lgr->rtokens_used_mask); 1884 return 0; 1885 } 1886 } 1887 return -ENOENT; 1888 } 1889 1890 /* save rkey and dma_addr received from peer during clc handshake */ 1891 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1892 struct smc_link *lnk, 1893 struct smc_clc_msg_accept_confirm *clc) 1894 { 1895 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, 1896 clc->rmb_rkey); 1897 if (conn->rtoken_idx < 0) 1898 return conn->rtoken_idx; 1899 return 0; 1900 } 1901 1902 static void smc_core_going_away(void) 1903 { 1904 struct smc_ib_device *smcibdev; 1905 struct smcd_dev *smcd; 1906 1907 mutex_lock(&smc_ib_devices.mutex); 1908 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { 1909 int i; 1910 1911 for (i = 0; i < SMC_MAX_PORTS; i++) 1912 set_bit(i, smcibdev->ports_going_away); 1913 } 1914 mutex_unlock(&smc_ib_devices.mutex); 1915 1916 mutex_lock(&smcd_dev_list.mutex); 1917 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1918 smcd->going_away = 1; 1919 } 1920 mutex_unlock(&smcd_dev_list.mutex); 1921 } 1922 1923 /* Clean up all SMC link groups */ 1924 static void smc_lgrs_shutdown(void) 1925 { 1926 struct smcd_dev *smcd; 1927 1928 smc_core_going_away(); 1929 1930 smc_smcr_terminate_all(NULL); 1931 1932 mutex_lock(&smcd_dev_list.mutex); 1933 list_for_each_entry(smcd, &smcd_dev_list.list, list) 1934 smc_smcd_terminate_all(smcd); 1935 mutex_unlock(&smcd_dev_list.mutex); 1936 } 1937 1938 static int smc_core_reboot_event(struct notifier_block *this, 1939 unsigned long event, void *ptr) 1940 { 1941 smc_lgrs_shutdown(); 1942 smc_ib_unregister_client(); 1943 return 0; 1944 } 1945 1946 static struct notifier_block smc_reboot_notifier = { 1947 .notifier_call = smc_core_reboot_event, 1948 }; 1949 1950 int __init smc_core_init(void) 1951 { 1952 return register_reboot_notifier(&smc_reboot_notifier); 1953 } 1954 1955 /* Called (from smc_exit) when module is removed */ 1956 void smc_core_exit(void) 1957 { 1958 unregister_reboot_notifier(&smc_reboot_notifier); 1959 smc_lgrs_shutdown(); 1960 } 1961