1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 20 #include "smc.h" 21 #include "smc_clc.h" 22 #include "smc_core.h" 23 #include "smc_ib.h" 24 #include "smc_wr.h" 25 #include "smc_llc.h" 26 #include "smc_cdc.h" 27 #include "smc_close.h" 28 29 #define SMC_LGR_NUM_INCR 256 30 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 31 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) 32 33 static u32 smc_lgr_num; /* unique link group number */ 34 35 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, 36 bool is_rmb); 37 38 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 39 { 40 /* client link group creation always follows the server link group 41 * creation. For client use a somewhat higher removal delay time, 42 * otherwise there is a risk of out-of-sync link groups. 43 */ 44 mod_delayed_work(system_wq, &lgr->free_work, 45 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : 46 SMC_LGR_FREE_DELAY_SERV); 47 } 48 49 /* Register connection's alert token in our lookup structure. 50 * To use rbtrees we have to implement our own insert core. 51 * Requires @conns_lock 52 * @smc connection to register 53 * Returns 0 on success, != otherwise. 54 */ 55 static void smc_lgr_add_alert_token(struct smc_connection *conn) 56 { 57 struct rb_node **link, *parent = NULL; 58 u32 token = conn->alert_token_local; 59 60 link = &conn->lgr->conns_all.rb_node; 61 while (*link) { 62 struct smc_connection *cur = rb_entry(*link, 63 struct smc_connection, alert_node); 64 65 parent = *link; 66 if (cur->alert_token_local > token) 67 link = &parent->rb_left; 68 else 69 link = &parent->rb_right; 70 } 71 /* Put the new node there */ 72 rb_link_node(&conn->alert_node, parent, link); 73 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 74 } 75 76 /* Register connection in link group by assigning an alert token 77 * registered in a search tree. 78 * Requires @conns_lock 79 * Note that '0' is a reserved value and not assigned. 80 */ 81 static void smc_lgr_register_conn(struct smc_connection *conn) 82 { 83 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 84 static atomic_t nexttoken = ATOMIC_INIT(0); 85 86 /* find a new alert_token_local value not yet used by some connection 87 * in this link group 88 */ 89 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 90 while (!conn->alert_token_local) { 91 conn->alert_token_local = atomic_inc_return(&nexttoken); 92 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 93 conn->alert_token_local = 0; 94 } 95 smc_lgr_add_alert_token(conn); 96 conn->lgr->conns_num++; 97 } 98 99 /* Unregister connection and reset the alert token of the given connection< 100 */ 101 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 102 { 103 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 104 struct smc_link_group *lgr = conn->lgr; 105 106 rb_erase(&conn->alert_node, &lgr->conns_all); 107 lgr->conns_num--; 108 conn->alert_token_local = 0; 109 conn->lgr = NULL; 110 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 111 } 112 113 /* Unregister connection and trigger lgr freeing if applicable 114 */ 115 static void smc_lgr_unregister_conn(struct smc_connection *conn) 116 { 117 struct smc_link_group *lgr = conn->lgr; 118 int reduced = 0; 119 120 write_lock_bh(&lgr->conns_lock); 121 if (conn->alert_token_local) { 122 reduced = 1; 123 __smc_lgr_unregister_conn(conn); 124 } 125 write_unlock_bh(&lgr->conns_lock); 126 if (!reduced || lgr->conns_num) 127 return; 128 smc_lgr_schedule_free_work(lgr); 129 } 130 131 static void smc_lgr_free_work(struct work_struct *work) 132 { 133 struct smc_link_group *lgr = container_of(to_delayed_work(work), 134 struct smc_link_group, 135 free_work); 136 bool conns; 137 138 spin_lock_bh(&smc_lgr_list.lock); 139 if (list_empty(&lgr->list)) 140 goto free; 141 read_lock_bh(&lgr->conns_lock); 142 conns = RB_EMPTY_ROOT(&lgr->conns_all); 143 read_unlock_bh(&lgr->conns_lock); 144 if (!conns) { /* number of lgr connections is no longer zero */ 145 spin_unlock_bh(&smc_lgr_list.lock); 146 return; 147 } 148 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 149 free: 150 spin_unlock_bh(&smc_lgr_list.lock); 151 if (!delayed_work_pending(&lgr->free_work)) 152 smc_lgr_free(lgr); 153 } 154 155 /* create a new SMC link group */ 156 static int smc_lgr_create(struct smc_sock *smc, 157 struct smc_ib_device *smcibdev, u8 ibport, 158 char *peer_systemid, unsigned short vlan_id) 159 { 160 struct smc_link_group *lgr; 161 struct smc_link *lnk; 162 u8 rndvec[3]; 163 int rc = 0; 164 int i; 165 166 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 167 if (!lgr) { 168 rc = -ENOMEM; 169 goto out; 170 } 171 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 172 lgr->sync_err = false; 173 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); 174 lgr->vlan_id = vlan_id; 175 rwlock_init(&lgr->sndbufs_lock); 176 rwlock_init(&lgr->rmbs_lock); 177 for (i = 0; i < SMC_RMBE_SIZES; i++) { 178 INIT_LIST_HEAD(&lgr->sndbufs[i]); 179 INIT_LIST_HEAD(&lgr->rmbs[i]); 180 } 181 smc_lgr_num += SMC_LGR_NUM_INCR; 182 memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE); 183 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 184 lgr->conns_all = RB_ROOT; 185 186 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 187 /* initialize link */ 188 lnk->state = SMC_LNK_ACTIVATING; 189 lnk->link_id = SMC_SINGLE_LINK; 190 lnk->smcibdev = smcibdev; 191 lnk->ibport = ibport; 192 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 193 if (!smcibdev->initialized) 194 smc_ib_setup_per_ibdev(smcibdev); 195 get_random_bytes(rndvec, sizeof(rndvec)); 196 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); 197 rc = smc_wr_alloc_link_mem(lnk); 198 if (rc) 199 goto free_lgr; 200 rc = smc_ib_create_protection_domain(lnk); 201 if (rc) 202 goto free_link_mem; 203 rc = smc_ib_create_queue_pair(lnk); 204 if (rc) 205 goto dealloc_pd; 206 rc = smc_wr_create_link(lnk); 207 if (rc) 208 goto destroy_qp; 209 init_completion(&lnk->llc_confirm); 210 init_completion(&lnk->llc_confirm_resp); 211 init_completion(&lnk->llc_add); 212 init_completion(&lnk->llc_add_resp); 213 214 smc->conn.lgr = lgr; 215 rwlock_init(&lgr->conns_lock); 216 spin_lock_bh(&smc_lgr_list.lock); 217 list_add(&lgr->list, &smc_lgr_list.list); 218 spin_unlock_bh(&smc_lgr_list.lock); 219 return 0; 220 221 destroy_qp: 222 smc_ib_destroy_queue_pair(lnk); 223 dealloc_pd: 224 smc_ib_dealloc_protection_domain(lnk); 225 free_link_mem: 226 smc_wr_free_link_mem(lnk); 227 free_lgr: 228 kfree(lgr); 229 out: 230 return rc; 231 } 232 233 static void smc_buf_unuse(struct smc_connection *conn) 234 { 235 if (conn->sndbuf_desc) { 236 conn->sndbuf_desc->used = 0; 237 conn->sndbuf_size = 0; 238 } 239 if (conn->rmb_desc) { 240 if (!conn->rmb_desc->regerr) { 241 conn->rmb_desc->reused = 1; 242 conn->rmb_desc->used = 0; 243 conn->rmbe_size = 0; 244 } else { 245 /* buf registration failed, reuse not possible */ 246 struct smc_link_group *lgr = conn->lgr; 247 struct smc_link *lnk; 248 249 write_lock_bh(&lgr->rmbs_lock); 250 list_del(&conn->rmb_desc->list); 251 write_unlock_bh(&lgr->rmbs_lock); 252 253 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 254 smc_buf_free(conn->rmb_desc, lnk, true); 255 } 256 } 257 } 258 259 /* remove a finished connection from its link group */ 260 void smc_conn_free(struct smc_connection *conn) 261 { 262 if (!conn->lgr) 263 return; 264 smc_cdc_tx_dismiss_slots(conn); 265 smc_lgr_unregister_conn(conn); 266 smc_buf_unuse(conn); 267 } 268 269 static void smc_link_clear(struct smc_link *lnk) 270 { 271 lnk->peer_qpn = 0; 272 smc_ib_modify_qp_reset(lnk); 273 smc_wr_free_link(lnk); 274 smc_ib_destroy_queue_pair(lnk); 275 smc_ib_dealloc_protection_domain(lnk); 276 smc_wr_free_link_mem(lnk); 277 } 278 279 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, 280 bool is_rmb) 281 { 282 if (is_rmb) { 283 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 284 smc_ib_put_memory_region( 285 buf_desc->mr_rx[SMC_SINGLE_LINK]); 286 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 287 DMA_FROM_DEVICE); 288 } else { 289 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 290 DMA_TO_DEVICE); 291 } 292 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 293 if (buf_desc->cpu_addr) 294 free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order); 295 kfree(buf_desc); 296 } 297 298 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 299 { 300 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 301 struct smc_buf_desc *buf_desc, *bf_desc; 302 struct list_head *buf_list; 303 int i; 304 305 for (i = 0; i < SMC_RMBE_SIZES; i++) { 306 if (is_rmb) 307 buf_list = &lgr->rmbs[i]; 308 else 309 buf_list = &lgr->sndbufs[i]; 310 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 311 list) { 312 list_del(&buf_desc->list); 313 smc_buf_free(buf_desc, lnk, is_rmb); 314 } 315 } 316 } 317 318 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 319 { 320 /* free send buffers */ 321 __smc_lgr_free_bufs(lgr, false); 322 /* free rmbs */ 323 __smc_lgr_free_bufs(lgr, true); 324 } 325 326 /* remove a link group */ 327 void smc_lgr_free(struct smc_link_group *lgr) 328 { 329 smc_lgr_free_bufs(lgr); 330 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 331 kfree(lgr); 332 } 333 334 void smc_lgr_forget(struct smc_link_group *lgr) 335 { 336 spin_lock_bh(&smc_lgr_list.lock); 337 /* do not use this link group for new connections */ 338 if (!list_empty(&lgr->list)) 339 list_del_init(&lgr->list); 340 spin_unlock_bh(&smc_lgr_list.lock); 341 } 342 343 /* terminate linkgroup abnormally */ 344 void smc_lgr_terminate(struct smc_link_group *lgr) 345 { 346 struct smc_connection *conn; 347 struct smc_sock *smc; 348 struct rb_node *node; 349 350 smc_lgr_forget(lgr); 351 352 write_lock_bh(&lgr->conns_lock); 353 node = rb_first(&lgr->conns_all); 354 while (node) { 355 conn = rb_entry(node, struct smc_connection, alert_node); 356 smc = container_of(conn, struct smc_sock, conn); 357 sock_hold(&smc->sk); /* sock_put in close work */ 358 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 359 __smc_lgr_unregister_conn(conn); 360 write_unlock_bh(&lgr->conns_lock); 361 if (!schedule_work(&conn->close_work)) 362 sock_put(&smc->sk); 363 write_lock_bh(&lgr->conns_lock); 364 node = rb_first(&lgr->conns_all); 365 } 366 write_unlock_bh(&lgr->conns_lock); 367 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 368 smc_lgr_schedule_free_work(lgr); 369 } 370 371 /* Determine vlan of internal TCP socket. 372 * @vlan_id: address to store the determined vlan id into 373 */ 374 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 375 { 376 struct dst_entry *dst = sk_dst_get(clcsock->sk); 377 int rc = 0; 378 379 *vlan_id = 0; 380 if (!dst) { 381 rc = -ENOTCONN; 382 goto out; 383 } 384 if (!dst->dev) { 385 rc = -ENODEV; 386 goto out_rel; 387 } 388 389 if (is_vlan_dev(dst->dev)) 390 *vlan_id = vlan_dev_vlan_id(dst->dev); 391 392 out_rel: 393 dst_release(dst); 394 out: 395 return rc; 396 } 397 398 /* determine the link gid matching the vlan id of the link group */ 399 static int smc_link_determine_gid(struct smc_link_group *lgr) 400 { 401 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 402 struct ib_gid_attr gattr; 403 union ib_gid gid; 404 int i; 405 406 if (!lgr->vlan_id) { 407 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; 408 return 0; 409 } 410 411 for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; 412 i++) { 413 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 414 &gattr)) 415 continue; 416 if (gattr.ndev) { 417 if (is_vlan_dev(gattr.ndev) && 418 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { 419 lnk->gid = gid; 420 dev_put(gattr.ndev); 421 return 0; 422 } 423 dev_put(gattr.ndev); 424 } 425 } 426 return -ENODEV; 427 } 428 429 /* create a new SMC connection (and a new link group if necessary) */ 430 int smc_conn_create(struct smc_sock *smc, 431 struct smc_ib_device *smcibdev, u8 ibport, 432 struct smc_clc_msg_local *lcl, int srv_first_contact) 433 { 434 struct smc_connection *conn = &smc->conn; 435 struct smc_link_group *lgr; 436 unsigned short vlan_id; 437 enum smc_lgr_role role; 438 int local_contact = SMC_FIRST_CONTACT; 439 int rc = 0; 440 441 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 442 rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); 443 if (rc) 444 return rc; 445 446 if ((role == SMC_CLNT) && srv_first_contact) 447 /* create new link group as well */ 448 goto create; 449 450 /* determine if an existing link group can be reused */ 451 spin_lock_bh(&smc_lgr_list.lock); 452 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 453 write_lock_bh(&lgr->conns_lock); 454 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, 455 SMC_SYSTEMID_LEN) && 456 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 457 SMC_GID_SIZE) && 458 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 459 sizeof(lcl->mac)) && 460 !lgr->sync_err && 461 (lgr->role == role) && 462 (lgr->vlan_id == vlan_id) && 463 ((role == SMC_CLNT) || 464 (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { 465 /* link group found */ 466 local_contact = SMC_REUSE_CONTACT; 467 conn->lgr = lgr; 468 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 469 write_unlock_bh(&lgr->conns_lock); 470 break; 471 } 472 write_unlock_bh(&lgr->conns_lock); 473 } 474 spin_unlock_bh(&smc_lgr_list.lock); 475 476 if (role == SMC_CLNT && !srv_first_contact && 477 (local_contact == SMC_FIRST_CONTACT)) { 478 /* Server reuses a link group, but Client wants to start 479 * a new one 480 * send out_of_sync decline, reason synchr. error 481 */ 482 return -ENOLINK; 483 } 484 485 create: 486 if (local_contact == SMC_FIRST_CONTACT) { 487 rc = smc_lgr_create(smc, smcibdev, ibport, 488 lcl->id_for_peer, vlan_id); 489 if (rc) 490 goto out; 491 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 492 rc = smc_link_determine_gid(conn->lgr); 493 } 494 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 495 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 496 #ifndef KERNEL_HAS_ATOMIC64 497 spin_lock_init(&conn->acurs_lock); 498 #endif 499 500 out: 501 return rc ? rc : local_contact; 502 } 503 504 /* try to reuse a sndbuf or rmb description slot for a certain 505 * buffer size; if not available, return NULL 506 */ 507 static inline 508 struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, 509 int compressed_bufsize, 510 rwlock_t *lock, 511 struct list_head *buf_list) 512 { 513 struct smc_buf_desc *buf_slot; 514 515 read_lock_bh(lock); 516 list_for_each_entry(buf_slot, buf_list, list) { 517 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 518 read_unlock_bh(lock); 519 return buf_slot; 520 } 521 } 522 read_unlock_bh(lock); 523 return NULL; 524 } 525 526 /* one of the conditions for announcing a receiver's current window size is 527 * that it "results in a minimum increase in the window size of 10% of the 528 * receive buffer space" [RFC7609] 529 */ 530 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 531 { 532 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 533 } 534 535 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, 536 bool is_rmb, int bufsize) 537 { 538 struct smc_buf_desc *buf_desc; 539 struct smc_link *lnk; 540 int rc; 541 542 /* try to alloc a new buffer */ 543 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 544 if (!buf_desc) 545 return ERR_PTR(-ENOMEM); 546 547 buf_desc->cpu_addr = 548 (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | 549 __GFP_NOMEMALLOC | 550 __GFP_NORETRY | __GFP_ZERO, 551 get_order(bufsize)); 552 if (!buf_desc->cpu_addr) { 553 kfree(buf_desc); 554 return ERR_PTR(-EAGAIN); 555 } 556 buf_desc->order = get_order(bufsize); 557 558 /* build the sg table from the pages */ 559 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 560 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 561 GFP_KERNEL); 562 if (rc) { 563 smc_buf_free(buf_desc, lnk, is_rmb); 564 return ERR_PTR(rc); 565 } 566 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 567 buf_desc->cpu_addr, bufsize); 568 569 /* map sg table to DMA address */ 570 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 571 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 572 /* SMC protocol depends on mapping to one DMA address only */ 573 if (rc != 1) { 574 smc_buf_free(buf_desc, lnk, is_rmb); 575 return ERR_PTR(-EAGAIN); 576 } 577 578 /* create a new memory region for the RMB */ 579 if (is_rmb) { 580 rc = smc_ib_get_memory_region(lnk->roce_pd, 581 IB_ACCESS_REMOTE_WRITE | 582 IB_ACCESS_LOCAL_WRITE, 583 buf_desc); 584 if (rc) { 585 smc_buf_free(buf_desc, lnk, is_rmb); 586 return ERR_PTR(rc); 587 } 588 } 589 590 return buf_desc; 591 } 592 593 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) 594 { 595 struct smc_connection *conn = &smc->conn; 596 struct smc_link_group *lgr = conn->lgr; 597 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 598 struct list_head *buf_list; 599 int bufsize, bufsize_short; 600 int sk_buf_size; 601 rwlock_t *lock; 602 603 if (is_rmb) 604 /* use socket recv buffer size (w/o overhead) as start value */ 605 sk_buf_size = smc->sk.sk_rcvbuf / 2; 606 else 607 /* use socket send buffer size (w/o overhead) as start value */ 608 sk_buf_size = smc->sk.sk_sndbuf / 2; 609 610 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 611 bufsize_short >= 0; bufsize_short--) { 612 613 if (is_rmb) { 614 lock = &lgr->rmbs_lock; 615 buf_list = &lgr->rmbs[bufsize_short]; 616 } else { 617 lock = &lgr->sndbufs_lock; 618 buf_list = &lgr->sndbufs[bufsize_short]; 619 } 620 bufsize = smc_uncompress_bufsize(bufsize_short); 621 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 622 continue; 623 624 /* check for reusable slot in the link group */ 625 buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); 626 if (buf_desc) { 627 memset(buf_desc->cpu_addr, 0, bufsize); 628 break; /* found reusable slot */ 629 } 630 631 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); 632 if (PTR_ERR(buf_desc) == -ENOMEM) 633 break; 634 if (IS_ERR(buf_desc)) 635 continue; 636 637 buf_desc->used = 1; 638 write_lock_bh(lock); 639 list_add(&buf_desc->list, buf_list); 640 write_unlock_bh(lock); 641 break; /* found */ 642 } 643 644 if (IS_ERR(buf_desc)) 645 return -ENOMEM; 646 647 if (is_rmb) { 648 conn->rmb_desc = buf_desc; 649 conn->rmbe_size = bufsize; 650 conn->rmbe_size_short = bufsize_short; 651 smc->sk.sk_rcvbuf = bufsize * 2; 652 atomic_set(&conn->bytes_to_rcv, 0); 653 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); 654 } else { 655 conn->sndbuf_desc = buf_desc; 656 conn->sndbuf_size = bufsize; 657 smc->sk.sk_sndbuf = bufsize * 2; 658 atomic_set(&conn->sndbuf_space, bufsize); 659 } 660 return 0; 661 } 662 663 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 664 { 665 struct smc_link_group *lgr = conn->lgr; 666 667 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 668 conn->sndbuf_desc, DMA_TO_DEVICE); 669 } 670 671 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 672 { 673 struct smc_link_group *lgr = conn->lgr; 674 675 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 676 conn->sndbuf_desc, DMA_TO_DEVICE); 677 } 678 679 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 680 { 681 struct smc_link_group *lgr = conn->lgr; 682 683 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 684 conn->rmb_desc, DMA_FROM_DEVICE); 685 } 686 687 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 688 { 689 struct smc_link_group *lgr = conn->lgr; 690 691 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 692 conn->rmb_desc, DMA_FROM_DEVICE); 693 } 694 695 /* create the send and receive buffer for an SMC socket; 696 * receive buffers are called RMBs; 697 * (even though the SMC protocol allows more than one RMB-element per RMB, 698 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 699 * extra RMB for every connection in a link group 700 */ 701 int smc_buf_create(struct smc_sock *smc) 702 { 703 int rc; 704 705 /* create send buffer */ 706 rc = __smc_buf_create(smc, false); 707 if (rc) 708 return rc; 709 /* create rmb */ 710 rc = __smc_buf_create(smc, true); 711 if (rc) 712 smc_buf_free(smc->conn.sndbuf_desc, 713 &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); 714 return rc; 715 } 716 717 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 718 { 719 int i; 720 721 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 722 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 723 return i; 724 } 725 return -ENOSPC; 726 } 727 728 /* add a new rtoken from peer */ 729 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 730 { 731 u64 dma_addr = be64_to_cpu(nw_vaddr); 732 u32 rkey = ntohl(nw_rkey); 733 int i; 734 735 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 736 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 737 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 738 test_bit(i, lgr->rtokens_used_mask)) { 739 /* already in list */ 740 return i; 741 } 742 } 743 i = smc_rmb_reserve_rtoken_idx(lgr); 744 if (i < 0) 745 return i; 746 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 747 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 748 return i; 749 } 750 751 /* delete an rtoken */ 752 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 753 { 754 u32 rkey = ntohl(nw_rkey); 755 int i; 756 757 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 758 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 759 test_bit(i, lgr->rtokens_used_mask)) { 760 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 761 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 762 763 clear_bit(i, lgr->rtokens_used_mask); 764 return 0; 765 } 766 } 767 return -ENOENT; 768 } 769 770 /* save rkey and dma_addr received from peer during clc handshake */ 771 int smc_rmb_rtoken_handling(struct smc_connection *conn, 772 struct smc_clc_msg_accept_confirm *clc) 773 { 774 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 775 clc->rmb_rkey); 776 if (conn->rtoken_idx < 0) 777 return conn->rtoken_idx; 778 return 0; 779 } 780