1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * Work Requests exploiting Infiniband API 5 * 6 * Work requests (WR) of type ib_post_send or ib_post_recv respectively 7 * are submitted to either RC SQ or RC RQ respectively 8 * (reliably connected send/receive queue) 9 * and become work queue entries (WQEs). 10 * While an SQ WR/WQE is pending, we track it until transmission completion. 11 * Through a send or receive completion queue (CQ) respectively, 12 * we get completion queue entries (CQEs) [aka work completions (WCs)]. 13 * Since the CQ callback is called from IRQ context, we split work by using 14 * bottom halves implemented by tasklets. 15 * 16 * SMC uses this to exchange LLC (link layer control) 17 * and CDC (connection data control) messages. 18 * 19 * Copyright IBM Corp. 2016 20 * 21 * Author(s): Steffen Maier <maier@linux.vnet.ibm.com> 22 */ 23 24 #include <linux/atomic.h> 25 #include <linux/hashtable.h> 26 #include <linux/wait.h> 27 #include <rdma/ib_verbs.h> 28 #include <asm/div64.h> 29 30 #include "smc.h" 31 #include "smc_wr.h" 32 33 #define SMC_WR_MAX_POLL_CQE 10 /* max. # of compl. queue elements in 1 poll */ 34 35 #define SMC_WR_RX_HASH_BITS 4 36 static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS); 37 static DEFINE_SPINLOCK(smc_wr_rx_hash_lock); 38 39 struct smc_wr_tx_pend { /* control data for a pending send request */ 40 u64 wr_id; /* work request id sent */ 41 smc_wr_tx_handler handler; 42 enum ib_wc_status wc_status; /* CQE status */ 43 struct smc_link *link; 44 u32 idx; 45 struct smc_wr_tx_pend_priv priv; 46 }; 47 48 /******************************** send queue *********************************/ 49 50 /*------------------------------- completion --------------------------------*/ 51 52 static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) 53 { 54 u32 i; 55 56 for (i = 0; i < link->wr_tx_cnt; i++) { 57 if (link->wr_tx_pends[i].wr_id == wr_id) 58 return i; 59 } 60 return link->wr_tx_cnt; 61 } 62 63 static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) 64 { 65 struct smc_wr_tx_pend pnd_snd; 66 struct smc_link *link; 67 u32 pnd_snd_idx; 68 int i; 69 70 link = wc->qp->qp_context; 71 72 if (wc->opcode == IB_WC_REG_MR) { 73 if (wc->status) 74 link->wr_reg_state = FAILED; 75 else 76 link->wr_reg_state = CONFIRMED; 77 wake_up(&link->wr_reg_wait); 78 return; 79 } 80 81 pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); 82 if (pnd_snd_idx == link->wr_tx_cnt) 83 return; 84 link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; 85 memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); 86 /* clear the full struct smc_wr_tx_pend including .priv */ 87 memset(&link->wr_tx_pends[pnd_snd_idx], 0, 88 sizeof(link->wr_tx_pends[pnd_snd_idx])); 89 memset(&link->wr_tx_bufs[pnd_snd_idx], 0, 90 sizeof(link->wr_tx_bufs[pnd_snd_idx])); 91 if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) 92 return; 93 if (wc->status) { 94 struct smc_link_group *lgr; 95 96 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { 97 /* clear full struct smc_wr_tx_pend including .priv */ 98 memset(&link->wr_tx_pends[i], 0, 99 sizeof(link->wr_tx_pends[i])); 100 memset(&link->wr_tx_bufs[i], 0, 101 sizeof(link->wr_tx_bufs[i])); 102 clear_bit(i, link->wr_tx_mask); 103 } 104 /* terminate connections of this link group abnormally */ 105 lgr = container_of(link, struct smc_link_group, 106 lnk[SMC_SINGLE_LINK]); 107 smc_lgr_terminate(lgr); 108 } 109 if (pnd_snd.handler) 110 pnd_snd.handler(&pnd_snd.priv, link, wc->status); 111 wake_up(&link->wr_tx_wait); 112 } 113 114 static void smc_wr_tx_tasklet_fn(unsigned long data) 115 { 116 struct smc_ib_device *dev = (struct smc_ib_device *)data; 117 struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; 118 int i = 0, rc; 119 int polled = 0; 120 121 again: 122 polled++; 123 do { 124 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); 125 if (polled == 1) { 126 ib_req_notify_cq(dev->roce_cq_send, 127 IB_CQ_NEXT_COMP | 128 IB_CQ_REPORT_MISSED_EVENTS); 129 } 130 if (!rc) 131 break; 132 for (i = 0; i < rc; i++) 133 smc_wr_tx_process_cqe(&wc[i]); 134 } while (rc > 0); 135 if (polled == 1) 136 goto again; 137 } 138 139 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) 140 { 141 struct smc_ib_device *dev = (struct smc_ib_device *)cq_context; 142 143 tasklet_schedule(&dev->send_tasklet); 144 } 145 146 /*---------------------------- request submission ---------------------------*/ 147 148 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) 149 { 150 *idx = link->wr_tx_cnt; 151 for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { 152 if (!test_and_set_bit(*idx, link->wr_tx_mask)) 153 return 0; 154 } 155 *idx = link->wr_tx_cnt; 156 return -EBUSY; 157 } 158 159 /** 160 * smc_wr_tx_get_free_slot() - returns buffer for message assembly, 161 * and sets info for pending transmit tracking 162 * @link: Pointer to smc_link used to later send the message. 163 * @handler: Send completion handler function pointer. 164 * @wr_buf: Out value returns pointer to message buffer. 165 * @wr_pend_priv: Out value returns pointer serving as handler context. 166 * 167 * Return: 0 on success, or -errno on error. 168 */ 169 int smc_wr_tx_get_free_slot(struct smc_link *link, 170 smc_wr_tx_handler handler, 171 struct smc_wr_buf **wr_buf, 172 struct smc_wr_tx_pend_priv **wr_pend_priv) 173 { 174 struct smc_wr_tx_pend *wr_pend; 175 struct ib_send_wr *wr_ib; 176 u64 wr_id; 177 u32 idx; 178 int rc; 179 180 *wr_buf = NULL; 181 *wr_pend_priv = NULL; 182 if (in_softirq()) { 183 rc = smc_wr_tx_get_free_slot_index(link, &idx); 184 if (rc) 185 return rc; 186 } else { 187 rc = wait_event_interruptible_timeout( 188 link->wr_tx_wait, 189 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), 190 SMC_WR_TX_WAIT_FREE_SLOT_TIME); 191 if (!rc) { 192 /* timeout - terminate connections */ 193 struct smc_link_group *lgr; 194 195 lgr = container_of(link, struct smc_link_group, 196 lnk[SMC_SINGLE_LINK]); 197 smc_lgr_terminate(lgr); 198 return -EPIPE; 199 } 200 if (rc == -ERESTARTSYS) 201 return -EINTR; 202 if (idx == link->wr_tx_cnt) 203 return -EPIPE; 204 } 205 wr_id = smc_wr_tx_get_next_wr_id(link); 206 wr_pend = &link->wr_tx_pends[idx]; 207 wr_pend->wr_id = wr_id; 208 wr_pend->handler = handler; 209 wr_pend->link = link; 210 wr_pend->idx = idx; 211 wr_ib = &link->wr_tx_ibs[idx]; 212 wr_ib->wr_id = wr_id; 213 *wr_buf = &link->wr_tx_bufs[idx]; 214 *wr_pend_priv = &wr_pend->priv; 215 return 0; 216 } 217 218 int smc_wr_tx_put_slot(struct smc_link *link, 219 struct smc_wr_tx_pend_priv *wr_pend_priv) 220 { 221 struct smc_wr_tx_pend *pend; 222 223 pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv); 224 if (pend->idx < link->wr_tx_cnt) { 225 /* clear the full struct smc_wr_tx_pend including .priv */ 226 memset(&link->wr_tx_pends[pend->idx], 0, 227 sizeof(link->wr_tx_pends[pend->idx])); 228 memset(&link->wr_tx_bufs[pend->idx], 0, 229 sizeof(link->wr_tx_bufs[pend->idx])); 230 test_and_clear_bit(pend->idx, link->wr_tx_mask); 231 return 1; 232 } 233 234 return 0; 235 } 236 237 /* Send prepared WR slot via ib_post_send. 238 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer 239 */ 240 int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) 241 { 242 struct ib_send_wr *failed_wr = NULL; 243 struct smc_wr_tx_pend *pend; 244 int rc; 245 246 ib_req_notify_cq(link->smcibdev->roce_cq_send, 247 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); 248 pend = container_of(priv, struct smc_wr_tx_pend, priv); 249 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], 250 &failed_wr); 251 if (rc) 252 smc_wr_tx_put_slot(link, priv); 253 return rc; 254 } 255 256 /* Register a memory region and wait for result. */ 257 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) 258 { 259 struct ib_send_wr *failed_wr = NULL; 260 int rc; 261 262 ib_req_notify_cq(link->smcibdev->roce_cq_send, 263 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); 264 link->wr_reg_state = POSTED; 265 link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; 266 link->wr_reg.mr = mr; 267 link->wr_reg.key = mr->rkey; 268 failed_wr = &link->wr_reg.wr; 269 rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); 270 WARN_ON(failed_wr != &link->wr_reg.wr); 271 if (rc) 272 return rc; 273 274 rc = wait_event_interruptible_timeout(link->wr_reg_wait, 275 (link->wr_reg_state != POSTED), 276 SMC_WR_REG_MR_WAIT_TIME); 277 if (!rc) { 278 /* timeout - terminate connections */ 279 struct smc_link_group *lgr; 280 281 lgr = container_of(link, struct smc_link_group, 282 lnk[SMC_SINGLE_LINK]); 283 smc_lgr_terminate(lgr); 284 return -EPIPE; 285 } 286 if (rc == -ERESTARTSYS) 287 return -EINTR; 288 switch (link->wr_reg_state) { 289 case CONFIRMED: 290 rc = 0; 291 break; 292 case FAILED: 293 rc = -EIO; 294 break; 295 case POSTED: 296 rc = -EPIPE; 297 break; 298 } 299 return rc; 300 } 301 302 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, 303 smc_wr_tx_filter filter, 304 smc_wr_tx_dismisser dismisser, 305 unsigned long data) 306 { 307 struct smc_wr_tx_pend_priv *tx_pend; 308 struct smc_wr_rx_hdr *wr_rx; 309 int i; 310 311 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { 312 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; 313 if (wr_rx->type != wr_rx_hdr_type) 314 continue; 315 tx_pend = &link->wr_tx_pends[i].priv; 316 if (filter(tx_pend, data)) 317 dismisser(tx_pend); 318 } 319 } 320 321 bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, 322 smc_wr_tx_filter filter, unsigned long data) 323 { 324 struct smc_wr_tx_pend_priv *tx_pend; 325 struct smc_wr_rx_hdr *wr_rx; 326 int i; 327 328 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { 329 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; 330 if (wr_rx->type != wr_rx_hdr_type) 331 continue; 332 tx_pend = &link->wr_tx_pends[i].priv; 333 if (filter(tx_pend, data)) 334 return true; 335 } 336 return false; 337 } 338 339 /****************************** receive queue ********************************/ 340 341 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) 342 { 343 struct smc_wr_rx_handler *h_iter; 344 int rc = 0; 345 346 spin_lock(&smc_wr_rx_hash_lock); 347 hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) { 348 if (h_iter->type == handler->type) { 349 rc = -EEXIST; 350 goto out_unlock; 351 } 352 } 353 hash_add(smc_wr_rx_hash, &handler->list, handler->type); 354 out_unlock: 355 spin_unlock(&smc_wr_rx_hash_lock); 356 return rc; 357 } 358 359 /* Demultiplex a received work request based on the message type to its handler. 360 * Relies on smc_wr_rx_hash having been completely filled before any IB WRs, 361 * and not being modified any more afterwards so we don't need to lock it. 362 */ 363 static inline void smc_wr_rx_demultiplex(struct ib_wc *wc) 364 { 365 struct smc_link *link = (struct smc_link *)wc->qp->qp_context; 366 struct smc_wr_rx_handler *handler; 367 struct smc_wr_rx_hdr *wr_rx; 368 u64 temp_wr_id; 369 u32 index; 370 371 if (wc->byte_len < sizeof(*wr_rx)) 372 return; /* short message */ 373 temp_wr_id = wc->wr_id; 374 index = do_div(temp_wr_id, link->wr_rx_cnt); 375 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index]; 376 hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) { 377 if (handler->type == wr_rx->type) 378 handler->handler(wc, wr_rx); 379 } 380 } 381 382 static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) 383 { 384 struct smc_link *link; 385 int i; 386 387 for (i = 0; i < num; i++) { 388 link = wc[i].qp->qp_context; 389 if (wc[i].status == IB_WC_SUCCESS) { 390 smc_wr_rx_demultiplex(&wc[i]); 391 smc_wr_rx_post(link); /* refill WR RX */ 392 } else { 393 struct smc_link_group *lgr; 394 395 /* handle status errors */ 396 switch (wc[i].status) { 397 case IB_WC_RETRY_EXC_ERR: 398 case IB_WC_RNR_RETRY_EXC_ERR: 399 case IB_WC_WR_FLUSH_ERR: 400 /* terminate connections of this link group 401 * abnormally 402 */ 403 lgr = container_of(link, struct smc_link_group, 404 lnk[SMC_SINGLE_LINK]); 405 smc_lgr_terminate(lgr); 406 break; 407 default: 408 smc_wr_rx_post(link); /* refill WR RX */ 409 break; 410 } 411 } 412 } 413 } 414 415 static void smc_wr_rx_tasklet_fn(unsigned long data) 416 { 417 struct smc_ib_device *dev = (struct smc_ib_device *)data; 418 struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; 419 int polled = 0; 420 int rc; 421 422 again: 423 polled++; 424 do { 425 memset(&wc, 0, sizeof(wc)); 426 rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc); 427 if (polled == 1) { 428 ib_req_notify_cq(dev->roce_cq_recv, 429 IB_CQ_SOLICITED_MASK 430 | IB_CQ_REPORT_MISSED_EVENTS); 431 } 432 if (!rc) 433 break; 434 smc_wr_rx_process_cqes(&wc[0], rc); 435 } while (rc > 0); 436 if (polled == 1) 437 goto again; 438 } 439 440 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context) 441 { 442 struct smc_ib_device *dev = (struct smc_ib_device *)cq_context; 443 444 tasklet_schedule(&dev->recv_tasklet); 445 } 446 447 int smc_wr_rx_post_init(struct smc_link *link) 448 { 449 u32 i; 450 int rc = 0; 451 452 for (i = 0; i < link->wr_rx_cnt; i++) 453 rc = smc_wr_rx_post(link); 454 return rc; 455 } 456 457 /***************************** init, exit, misc ******************************/ 458 459 void smc_wr_remember_qp_attr(struct smc_link *lnk) 460 { 461 struct ib_qp_attr *attr = &lnk->qp_attr; 462 struct ib_qp_init_attr init_attr; 463 464 memset(attr, 0, sizeof(*attr)); 465 memset(&init_attr, 0, sizeof(init_attr)); 466 ib_query_qp(lnk->roce_qp, attr, 467 IB_QP_STATE | 468 IB_QP_CUR_STATE | 469 IB_QP_PKEY_INDEX | 470 IB_QP_PORT | 471 IB_QP_QKEY | 472 IB_QP_AV | 473 IB_QP_PATH_MTU | 474 IB_QP_TIMEOUT | 475 IB_QP_RETRY_CNT | 476 IB_QP_RNR_RETRY | 477 IB_QP_RQ_PSN | 478 IB_QP_ALT_PATH | 479 IB_QP_MIN_RNR_TIMER | 480 IB_QP_SQ_PSN | 481 IB_QP_PATH_MIG_STATE | 482 IB_QP_CAP | 483 IB_QP_DEST_QPN, 484 &init_attr); 485 486 lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT, 487 lnk->qp_attr.cap.max_send_wr); 488 lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3, 489 lnk->qp_attr.cap.max_recv_wr); 490 } 491 492 static void smc_wr_init_sge(struct smc_link *lnk) 493 { 494 u32 i; 495 496 for (i = 0; i < lnk->wr_tx_cnt; i++) { 497 lnk->wr_tx_sges[i].addr = 498 lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; 499 lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; 500 lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; 501 lnk->wr_tx_ibs[i].next = NULL; 502 lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i]; 503 lnk->wr_tx_ibs[i].num_sge = 1; 504 lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; 505 lnk->wr_tx_ibs[i].send_flags = 506 IB_SEND_SIGNALED | IB_SEND_SOLICITED; 507 } 508 for (i = 0; i < lnk->wr_rx_cnt; i++) { 509 lnk->wr_rx_sges[i].addr = 510 lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; 511 lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE; 512 lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; 513 lnk->wr_rx_ibs[i].next = NULL; 514 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; 515 lnk->wr_rx_ibs[i].num_sge = 1; 516 } 517 lnk->wr_reg.wr.next = NULL; 518 lnk->wr_reg.wr.num_sge = 0; 519 lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED; 520 lnk->wr_reg.wr.opcode = IB_WR_REG_MR; 521 lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; 522 } 523 524 void smc_wr_free_link(struct smc_link *lnk) 525 { 526 struct ib_device *ibdev; 527 528 memset(lnk->wr_tx_mask, 0, 529 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); 530 531 if (!lnk->smcibdev) 532 return; 533 ibdev = lnk->smcibdev->ibdev; 534 535 if (lnk->wr_rx_dma_addr) { 536 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, 537 SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, 538 DMA_FROM_DEVICE); 539 lnk->wr_rx_dma_addr = 0; 540 } 541 if (lnk->wr_tx_dma_addr) { 542 ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr, 543 SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, 544 DMA_TO_DEVICE); 545 lnk->wr_tx_dma_addr = 0; 546 } 547 } 548 549 void smc_wr_free_link_mem(struct smc_link *lnk) 550 { 551 kfree(lnk->wr_tx_pends); 552 lnk->wr_tx_pends = NULL; 553 kfree(lnk->wr_tx_mask); 554 lnk->wr_tx_mask = NULL; 555 kfree(lnk->wr_tx_sges); 556 lnk->wr_tx_sges = NULL; 557 kfree(lnk->wr_rx_sges); 558 lnk->wr_rx_sges = NULL; 559 kfree(lnk->wr_rx_ibs); 560 lnk->wr_rx_ibs = NULL; 561 kfree(lnk->wr_tx_ibs); 562 lnk->wr_tx_ibs = NULL; 563 kfree(lnk->wr_tx_bufs); 564 lnk->wr_tx_bufs = NULL; 565 kfree(lnk->wr_rx_bufs); 566 lnk->wr_rx_bufs = NULL; 567 } 568 569 int smc_wr_alloc_link_mem(struct smc_link *link) 570 { 571 /* allocate link related memory */ 572 link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); 573 if (!link->wr_tx_bufs) 574 goto no_mem; 575 link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE, 576 GFP_KERNEL); 577 if (!link->wr_rx_bufs) 578 goto no_mem_wr_tx_bufs; 579 link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]), 580 GFP_KERNEL); 581 if (!link->wr_tx_ibs) 582 goto no_mem_wr_rx_bufs; 583 link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3, 584 sizeof(link->wr_rx_ibs[0]), 585 GFP_KERNEL); 586 if (!link->wr_rx_ibs) 587 goto no_mem_wr_tx_ibs; 588 link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), 589 GFP_KERNEL); 590 if (!link->wr_tx_sges) 591 goto no_mem_wr_rx_ibs; 592 link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, 593 sizeof(link->wr_rx_sges[0]), 594 GFP_KERNEL); 595 if (!link->wr_rx_sges) 596 goto no_mem_wr_tx_sges; 597 link->wr_tx_mask = kzalloc( 598 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*link->wr_tx_mask), 599 GFP_KERNEL); 600 if (!link->wr_tx_mask) 601 goto no_mem_wr_rx_sges; 602 link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, 603 sizeof(link->wr_tx_pends[0]), 604 GFP_KERNEL); 605 if (!link->wr_tx_pends) 606 goto no_mem_wr_tx_mask; 607 return 0; 608 609 no_mem_wr_tx_mask: 610 kfree(link->wr_tx_mask); 611 no_mem_wr_rx_sges: 612 kfree(link->wr_rx_sges); 613 no_mem_wr_tx_sges: 614 kfree(link->wr_tx_sges); 615 no_mem_wr_rx_ibs: 616 kfree(link->wr_rx_ibs); 617 no_mem_wr_tx_ibs: 618 kfree(link->wr_tx_ibs); 619 no_mem_wr_rx_bufs: 620 kfree(link->wr_rx_bufs); 621 no_mem_wr_tx_bufs: 622 kfree(link->wr_tx_bufs); 623 no_mem: 624 return -ENOMEM; 625 } 626 627 void smc_wr_remove_dev(struct smc_ib_device *smcibdev) 628 { 629 tasklet_kill(&smcibdev->recv_tasklet); 630 tasklet_kill(&smcibdev->send_tasklet); 631 } 632 633 void smc_wr_add_dev(struct smc_ib_device *smcibdev) 634 { 635 tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn, 636 (unsigned long)smcibdev); 637 tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn, 638 (unsigned long)smcibdev); 639 } 640 641 int smc_wr_create_link(struct smc_link *lnk) 642 { 643 struct ib_device *ibdev = lnk->smcibdev->ibdev; 644 int rc = 0; 645 646 smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0); 647 lnk->wr_rx_id = 0; 648 lnk->wr_rx_dma_addr = ib_dma_map_single( 649 ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, 650 DMA_FROM_DEVICE); 651 if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) { 652 lnk->wr_rx_dma_addr = 0; 653 rc = -EIO; 654 goto out; 655 } 656 lnk->wr_tx_dma_addr = ib_dma_map_single( 657 ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, 658 DMA_TO_DEVICE); 659 if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) { 660 rc = -EIO; 661 goto dma_unmap; 662 } 663 smc_wr_init_sge(lnk); 664 memset(lnk->wr_tx_mask, 0, 665 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); 666 init_waitqueue_head(&lnk->wr_tx_wait); 667 init_waitqueue_head(&lnk->wr_reg_wait); 668 return rc; 669 670 dma_unmap: 671 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, 672 SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, 673 DMA_FROM_DEVICE); 674 lnk->wr_rx_dma_addr = 0; 675 out: 676 return rc; 677 } 678