xref: /openbmc/linux/net/smc/smc_wr.c (revision 74ce1896)
1 /*
2  * Shared Memory Communications over RDMA (SMC-R) and RoCE
3  *
4  * Work Requests exploiting Infiniband API
5  *
6  * Work requests (WR) of type ib_post_send or ib_post_recv respectively
7  * are submitted to either RC SQ or RC RQ respectively
8  * (reliably connected send/receive queue)
9  * and become work queue entries (WQEs).
10  * While an SQ WR/WQE is pending, we track it until transmission completion.
11  * Through a send or receive completion queue (CQ) respectively,
12  * we get completion queue entries (CQEs) [aka work completions (WCs)].
13  * Since the CQ callback is called from IRQ context, we split work by using
14  * bottom halves implemented by tasklets.
15  *
16  * SMC uses this to exchange LLC (link layer control)
17  * and CDC (connection data control) messages.
18  *
19  * Copyright IBM Corp. 2016
20  *
21  * Author(s):  Steffen Maier <maier@linux.vnet.ibm.com>
22  */
23 
24 #include <linux/atomic.h>
25 #include <linux/hashtable.h>
26 #include <linux/wait.h>
27 #include <rdma/ib_verbs.h>
28 #include <asm/div64.h>
29 
30 #include "smc.h"
31 #include "smc_wr.h"
32 
33 #define SMC_WR_MAX_POLL_CQE 10	/* max. # of compl. queue elements in 1 poll */
34 
35 #define SMC_WR_RX_HASH_BITS 4
36 static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
37 static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
38 
39 struct smc_wr_tx_pend {	/* control data for a pending send request */
40 	u64			wr_id;		/* work request id sent */
41 	smc_wr_tx_handler	handler;
42 	enum ib_wc_status	wc_status;	/* CQE status */
43 	struct smc_link		*link;
44 	u32			idx;
45 	struct smc_wr_tx_pend_priv priv;
46 };
47 
48 /******************************** send queue *********************************/
49 
50 /*------------------------------- completion --------------------------------*/
51 
52 static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
53 {
54 	u32 i;
55 
56 	for (i = 0; i < link->wr_tx_cnt; i++) {
57 		if (link->wr_tx_pends[i].wr_id == wr_id)
58 			return i;
59 	}
60 	return link->wr_tx_cnt;
61 }
62 
63 static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
64 {
65 	struct smc_wr_tx_pend pnd_snd;
66 	struct smc_link *link;
67 	u32 pnd_snd_idx;
68 	int i;
69 
70 	link = wc->qp->qp_context;
71 
72 	if (wc->opcode == IB_WC_REG_MR) {
73 		if (wc->status)
74 			link->wr_reg_state = FAILED;
75 		else
76 			link->wr_reg_state = CONFIRMED;
77 		wake_up(&link->wr_reg_wait);
78 		return;
79 	}
80 
81 	pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
82 	if (pnd_snd_idx == link->wr_tx_cnt)
83 		return;
84 	link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
85 	memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
86 	/* clear the full struct smc_wr_tx_pend including .priv */
87 	memset(&link->wr_tx_pends[pnd_snd_idx], 0,
88 	       sizeof(link->wr_tx_pends[pnd_snd_idx]));
89 	memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
90 	       sizeof(link->wr_tx_bufs[pnd_snd_idx]));
91 	if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
92 		return;
93 	if (wc->status) {
94 		struct smc_link_group *lgr;
95 
96 		for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
97 			/* clear full struct smc_wr_tx_pend including .priv */
98 			memset(&link->wr_tx_pends[i], 0,
99 			       sizeof(link->wr_tx_pends[i]));
100 			memset(&link->wr_tx_bufs[i], 0,
101 			       sizeof(link->wr_tx_bufs[i]));
102 			clear_bit(i, link->wr_tx_mask);
103 		}
104 		/* terminate connections of this link group abnormally */
105 		lgr = container_of(link, struct smc_link_group,
106 				   lnk[SMC_SINGLE_LINK]);
107 		smc_lgr_terminate(lgr);
108 	}
109 	if (pnd_snd.handler)
110 		pnd_snd.handler(&pnd_snd.priv, link, wc->status);
111 	wake_up(&link->wr_tx_wait);
112 }
113 
114 static void smc_wr_tx_tasklet_fn(unsigned long data)
115 {
116 	struct smc_ib_device *dev = (struct smc_ib_device *)data;
117 	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
118 	int i = 0, rc;
119 	int polled = 0;
120 
121 again:
122 	polled++;
123 	do {
124 		rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
125 		if (polled == 1) {
126 			ib_req_notify_cq(dev->roce_cq_send,
127 					 IB_CQ_NEXT_COMP |
128 					 IB_CQ_REPORT_MISSED_EVENTS);
129 		}
130 		if (!rc)
131 			break;
132 		for (i = 0; i < rc; i++)
133 			smc_wr_tx_process_cqe(&wc[i]);
134 	} while (rc > 0);
135 	if (polled == 1)
136 		goto again;
137 }
138 
139 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
140 {
141 	struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
142 
143 	tasklet_schedule(&dev->send_tasklet);
144 }
145 
146 /*---------------------------- request submission ---------------------------*/
147 
148 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
149 {
150 	*idx = link->wr_tx_cnt;
151 	for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
152 		if (!test_and_set_bit(*idx, link->wr_tx_mask))
153 			return 0;
154 	}
155 	*idx = link->wr_tx_cnt;
156 	return -EBUSY;
157 }
158 
159 /**
160  * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
161  *			and sets info for pending transmit tracking
162  * @link:		Pointer to smc_link used to later send the message.
163  * @handler:		Send completion handler function pointer.
164  * @wr_buf:		Out value returns pointer to message buffer.
165  * @wr_pend_priv:	Out value returns pointer serving as handler context.
166  *
167  * Return: 0 on success, or -errno on error.
168  */
169 int smc_wr_tx_get_free_slot(struct smc_link *link,
170 			    smc_wr_tx_handler handler,
171 			    struct smc_wr_buf **wr_buf,
172 			    struct smc_wr_tx_pend_priv **wr_pend_priv)
173 {
174 	struct smc_wr_tx_pend *wr_pend;
175 	struct ib_send_wr *wr_ib;
176 	u64 wr_id;
177 	u32 idx;
178 	int rc;
179 
180 	*wr_buf = NULL;
181 	*wr_pend_priv = NULL;
182 	if (in_softirq()) {
183 		rc = smc_wr_tx_get_free_slot_index(link, &idx);
184 		if (rc)
185 			return rc;
186 	} else {
187 		rc = wait_event_interruptible_timeout(
188 			link->wr_tx_wait,
189 			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
190 			SMC_WR_TX_WAIT_FREE_SLOT_TIME);
191 		if (!rc) {
192 			/* timeout - terminate connections */
193 			struct smc_link_group *lgr;
194 
195 			lgr = container_of(link, struct smc_link_group,
196 					   lnk[SMC_SINGLE_LINK]);
197 			smc_lgr_terminate(lgr);
198 			return -EPIPE;
199 		}
200 		if (rc == -ERESTARTSYS)
201 			return -EINTR;
202 		if (idx == link->wr_tx_cnt)
203 			return -EPIPE;
204 	}
205 	wr_id = smc_wr_tx_get_next_wr_id(link);
206 	wr_pend = &link->wr_tx_pends[idx];
207 	wr_pend->wr_id = wr_id;
208 	wr_pend->handler = handler;
209 	wr_pend->link = link;
210 	wr_pend->idx = idx;
211 	wr_ib = &link->wr_tx_ibs[idx];
212 	wr_ib->wr_id = wr_id;
213 	*wr_buf = &link->wr_tx_bufs[idx];
214 	*wr_pend_priv = &wr_pend->priv;
215 	return 0;
216 }
217 
218 int smc_wr_tx_put_slot(struct smc_link *link,
219 		       struct smc_wr_tx_pend_priv *wr_pend_priv)
220 {
221 	struct smc_wr_tx_pend *pend;
222 
223 	pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
224 	if (pend->idx < link->wr_tx_cnt) {
225 		/* clear the full struct smc_wr_tx_pend including .priv */
226 		memset(&link->wr_tx_pends[pend->idx], 0,
227 		       sizeof(link->wr_tx_pends[pend->idx]));
228 		memset(&link->wr_tx_bufs[pend->idx], 0,
229 		       sizeof(link->wr_tx_bufs[pend->idx]));
230 		test_and_clear_bit(pend->idx, link->wr_tx_mask);
231 		return 1;
232 	}
233 
234 	return 0;
235 }
236 
237 /* Send prepared WR slot via ib_post_send.
238  * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
239  */
240 int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
241 {
242 	struct ib_send_wr *failed_wr = NULL;
243 	struct smc_wr_tx_pend *pend;
244 	int rc;
245 
246 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
247 			 IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
248 	pend = container_of(priv, struct smc_wr_tx_pend, priv);
249 	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
250 			  &failed_wr);
251 	if (rc)
252 		smc_wr_tx_put_slot(link, priv);
253 	return rc;
254 }
255 
256 /* Register a memory region and wait for result. */
257 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
258 {
259 	struct ib_send_wr *failed_wr = NULL;
260 	int rc;
261 
262 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
263 			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
264 	link->wr_reg_state = POSTED;
265 	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
266 	link->wr_reg.mr = mr;
267 	link->wr_reg.key = mr->rkey;
268 	failed_wr = &link->wr_reg.wr;
269 	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
270 	WARN_ON(failed_wr != &link->wr_reg.wr);
271 	if (rc)
272 		return rc;
273 
274 	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
275 					      (link->wr_reg_state != POSTED),
276 					      SMC_WR_REG_MR_WAIT_TIME);
277 	if (!rc) {
278 		/* timeout - terminate connections */
279 		struct smc_link_group *lgr;
280 
281 		lgr = container_of(link, struct smc_link_group,
282 				   lnk[SMC_SINGLE_LINK]);
283 		smc_lgr_terminate(lgr);
284 		return -EPIPE;
285 	}
286 	if (rc == -ERESTARTSYS)
287 		return -EINTR;
288 	switch (link->wr_reg_state) {
289 	case CONFIRMED:
290 		rc = 0;
291 		break;
292 	case FAILED:
293 		rc = -EIO;
294 		break;
295 	case POSTED:
296 		rc = -EPIPE;
297 		break;
298 	}
299 	return rc;
300 }
301 
302 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
303 			     smc_wr_tx_filter filter,
304 			     smc_wr_tx_dismisser dismisser,
305 			     unsigned long data)
306 {
307 	struct smc_wr_tx_pend_priv *tx_pend;
308 	struct smc_wr_rx_hdr *wr_rx;
309 	int i;
310 
311 	for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
312 		wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
313 		if (wr_rx->type != wr_rx_hdr_type)
314 			continue;
315 		tx_pend = &link->wr_tx_pends[i].priv;
316 		if (filter(tx_pend, data))
317 			dismisser(tx_pend);
318 	}
319 }
320 
321 bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
322 			   smc_wr_tx_filter filter, unsigned long data)
323 {
324 	struct smc_wr_tx_pend_priv *tx_pend;
325 	struct smc_wr_rx_hdr *wr_rx;
326 	int i;
327 
328 	for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
329 		wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
330 		if (wr_rx->type != wr_rx_hdr_type)
331 			continue;
332 		tx_pend = &link->wr_tx_pends[i].priv;
333 		if (filter(tx_pend, data))
334 			return true;
335 	}
336 	return false;
337 }
338 
339 /****************************** receive queue ********************************/
340 
341 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
342 {
343 	struct smc_wr_rx_handler *h_iter;
344 	int rc = 0;
345 
346 	spin_lock(&smc_wr_rx_hash_lock);
347 	hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
348 		if (h_iter->type == handler->type) {
349 			rc = -EEXIST;
350 			goto out_unlock;
351 		}
352 	}
353 	hash_add(smc_wr_rx_hash, &handler->list, handler->type);
354 out_unlock:
355 	spin_unlock(&smc_wr_rx_hash_lock);
356 	return rc;
357 }
358 
359 /* Demultiplex a received work request based on the message type to its handler.
360  * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
361  * and not being modified any more afterwards so we don't need to lock it.
362  */
363 static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
364 {
365 	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
366 	struct smc_wr_rx_handler *handler;
367 	struct smc_wr_rx_hdr *wr_rx;
368 	u64 temp_wr_id;
369 	u32 index;
370 
371 	if (wc->byte_len < sizeof(*wr_rx))
372 		return; /* short message */
373 	temp_wr_id = wc->wr_id;
374 	index = do_div(temp_wr_id, link->wr_rx_cnt);
375 	wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
376 	hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
377 		if (handler->type == wr_rx->type)
378 			handler->handler(wc, wr_rx);
379 	}
380 }
381 
382 static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
383 {
384 	struct smc_link *link;
385 	int i;
386 
387 	for (i = 0; i < num; i++) {
388 		link = wc[i].qp->qp_context;
389 		if (wc[i].status == IB_WC_SUCCESS) {
390 			smc_wr_rx_demultiplex(&wc[i]);
391 			smc_wr_rx_post(link); /* refill WR RX */
392 		} else {
393 			struct smc_link_group *lgr;
394 
395 			/* handle status errors */
396 			switch (wc[i].status) {
397 			case IB_WC_RETRY_EXC_ERR:
398 			case IB_WC_RNR_RETRY_EXC_ERR:
399 			case IB_WC_WR_FLUSH_ERR:
400 				/* terminate connections of this link group
401 				 * abnormally
402 				 */
403 				lgr = container_of(link, struct smc_link_group,
404 						   lnk[SMC_SINGLE_LINK]);
405 				smc_lgr_terminate(lgr);
406 				break;
407 			default:
408 				smc_wr_rx_post(link); /* refill WR RX */
409 				break;
410 			}
411 		}
412 	}
413 }
414 
415 static void smc_wr_rx_tasklet_fn(unsigned long data)
416 {
417 	struct smc_ib_device *dev = (struct smc_ib_device *)data;
418 	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
419 	int polled = 0;
420 	int rc;
421 
422 again:
423 	polled++;
424 	do {
425 		memset(&wc, 0, sizeof(wc));
426 		rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
427 		if (polled == 1) {
428 			ib_req_notify_cq(dev->roce_cq_recv,
429 					 IB_CQ_SOLICITED_MASK
430 					 | IB_CQ_REPORT_MISSED_EVENTS);
431 		}
432 		if (!rc)
433 			break;
434 		smc_wr_rx_process_cqes(&wc[0], rc);
435 	} while (rc > 0);
436 	if (polled == 1)
437 		goto again;
438 }
439 
440 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
441 {
442 	struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
443 
444 	tasklet_schedule(&dev->recv_tasklet);
445 }
446 
447 int smc_wr_rx_post_init(struct smc_link *link)
448 {
449 	u32 i;
450 	int rc = 0;
451 
452 	for (i = 0; i < link->wr_rx_cnt; i++)
453 		rc = smc_wr_rx_post(link);
454 	return rc;
455 }
456 
457 /***************************** init, exit, misc ******************************/
458 
459 void smc_wr_remember_qp_attr(struct smc_link *lnk)
460 {
461 	struct ib_qp_attr *attr = &lnk->qp_attr;
462 	struct ib_qp_init_attr init_attr;
463 
464 	memset(attr, 0, sizeof(*attr));
465 	memset(&init_attr, 0, sizeof(init_attr));
466 	ib_query_qp(lnk->roce_qp, attr,
467 		    IB_QP_STATE |
468 		    IB_QP_CUR_STATE |
469 		    IB_QP_PKEY_INDEX |
470 		    IB_QP_PORT |
471 		    IB_QP_QKEY |
472 		    IB_QP_AV |
473 		    IB_QP_PATH_MTU |
474 		    IB_QP_TIMEOUT |
475 		    IB_QP_RETRY_CNT |
476 		    IB_QP_RNR_RETRY |
477 		    IB_QP_RQ_PSN |
478 		    IB_QP_ALT_PATH |
479 		    IB_QP_MIN_RNR_TIMER |
480 		    IB_QP_SQ_PSN |
481 		    IB_QP_PATH_MIG_STATE |
482 		    IB_QP_CAP |
483 		    IB_QP_DEST_QPN,
484 		    &init_attr);
485 
486 	lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
487 			       lnk->qp_attr.cap.max_send_wr);
488 	lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
489 			       lnk->qp_attr.cap.max_recv_wr);
490 }
491 
492 static void smc_wr_init_sge(struct smc_link *lnk)
493 {
494 	u32 i;
495 
496 	for (i = 0; i < lnk->wr_tx_cnt; i++) {
497 		lnk->wr_tx_sges[i].addr =
498 			lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
499 		lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
500 		lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
501 		lnk->wr_tx_ibs[i].next = NULL;
502 		lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
503 		lnk->wr_tx_ibs[i].num_sge = 1;
504 		lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
505 		lnk->wr_tx_ibs[i].send_flags =
506 			IB_SEND_SIGNALED | IB_SEND_SOLICITED;
507 	}
508 	for (i = 0; i < lnk->wr_rx_cnt; i++) {
509 		lnk->wr_rx_sges[i].addr =
510 			lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
511 		lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
512 		lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
513 		lnk->wr_rx_ibs[i].next = NULL;
514 		lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
515 		lnk->wr_rx_ibs[i].num_sge = 1;
516 	}
517 	lnk->wr_reg.wr.next = NULL;
518 	lnk->wr_reg.wr.num_sge = 0;
519 	lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
520 	lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
521 	lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
522 }
523 
524 void smc_wr_free_link(struct smc_link *lnk)
525 {
526 	struct ib_device *ibdev;
527 
528 	memset(lnk->wr_tx_mask, 0,
529 	       BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
530 
531 	if (!lnk->smcibdev)
532 		return;
533 	ibdev = lnk->smcibdev->ibdev;
534 
535 	if (lnk->wr_rx_dma_addr) {
536 		ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
537 				    SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
538 				    DMA_FROM_DEVICE);
539 		lnk->wr_rx_dma_addr = 0;
540 	}
541 	if (lnk->wr_tx_dma_addr) {
542 		ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
543 				    SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
544 				    DMA_TO_DEVICE);
545 		lnk->wr_tx_dma_addr = 0;
546 	}
547 }
548 
549 void smc_wr_free_link_mem(struct smc_link *lnk)
550 {
551 	kfree(lnk->wr_tx_pends);
552 	lnk->wr_tx_pends = NULL;
553 	kfree(lnk->wr_tx_mask);
554 	lnk->wr_tx_mask = NULL;
555 	kfree(lnk->wr_tx_sges);
556 	lnk->wr_tx_sges = NULL;
557 	kfree(lnk->wr_rx_sges);
558 	lnk->wr_rx_sges = NULL;
559 	kfree(lnk->wr_rx_ibs);
560 	lnk->wr_rx_ibs = NULL;
561 	kfree(lnk->wr_tx_ibs);
562 	lnk->wr_tx_ibs = NULL;
563 	kfree(lnk->wr_tx_bufs);
564 	lnk->wr_tx_bufs = NULL;
565 	kfree(lnk->wr_rx_bufs);
566 	lnk->wr_rx_bufs = NULL;
567 }
568 
569 int smc_wr_alloc_link_mem(struct smc_link *link)
570 {
571 	/* allocate link related memory */
572 	link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
573 	if (!link->wr_tx_bufs)
574 		goto no_mem;
575 	link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
576 				   GFP_KERNEL);
577 	if (!link->wr_rx_bufs)
578 		goto no_mem_wr_tx_bufs;
579 	link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
580 				  GFP_KERNEL);
581 	if (!link->wr_tx_ibs)
582 		goto no_mem_wr_rx_bufs;
583 	link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
584 				  sizeof(link->wr_rx_ibs[0]),
585 				  GFP_KERNEL);
586 	if (!link->wr_rx_ibs)
587 		goto no_mem_wr_tx_ibs;
588 	link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
589 				   GFP_KERNEL);
590 	if (!link->wr_tx_sges)
591 		goto no_mem_wr_rx_ibs;
592 	link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
593 				   sizeof(link->wr_rx_sges[0]),
594 				   GFP_KERNEL);
595 	if (!link->wr_rx_sges)
596 		goto no_mem_wr_tx_sges;
597 	link->wr_tx_mask = kzalloc(
598 		BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*link->wr_tx_mask),
599 		GFP_KERNEL);
600 	if (!link->wr_tx_mask)
601 		goto no_mem_wr_rx_sges;
602 	link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
603 				    sizeof(link->wr_tx_pends[0]),
604 				    GFP_KERNEL);
605 	if (!link->wr_tx_pends)
606 		goto no_mem_wr_tx_mask;
607 	return 0;
608 
609 no_mem_wr_tx_mask:
610 	kfree(link->wr_tx_mask);
611 no_mem_wr_rx_sges:
612 	kfree(link->wr_rx_sges);
613 no_mem_wr_tx_sges:
614 	kfree(link->wr_tx_sges);
615 no_mem_wr_rx_ibs:
616 	kfree(link->wr_rx_ibs);
617 no_mem_wr_tx_ibs:
618 	kfree(link->wr_tx_ibs);
619 no_mem_wr_rx_bufs:
620 	kfree(link->wr_rx_bufs);
621 no_mem_wr_tx_bufs:
622 	kfree(link->wr_tx_bufs);
623 no_mem:
624 	return -ENOMEM;
625 }
626 
627 void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
628 {
629 	tasklet_kill(&smcibdev->recv_tasklet);
630 	tasklet_kill(&smcibdev->send_tasklet);
631 }
632 
633 void smc_wr_add_dev(struct smc_ib_device *smcibdev)
634 {
635 	tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn,
636 		     (unsigned long)smcibdev);
637 	tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn,
638 		     (unsigned long)smcibdev);
639 }
640 
641 int smc_wr_create_link(struct smc_link *lnk)
642 {
643 	struct ib_device *ibdev = lnk->smcibdev->ibdev;
644 	int rc = 0;
645 
646 	smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
647 	lnk->wr_rx_id = 0;
648 	lnk->wr_rx_dma_addr = ib_dma_map_single(
649 		ibdev, lnk->wr_rx_bufs,	SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
650 		DMA_FROM_DEVICE);
651 	if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
652 		lnk->wr_rx_dma_addr = 0;
653 		rc = -EIO;
654 		goto out;
655 	}
656 	lnk->wr_tx_dma_addr = ib_dma_map_single(
657 		ibdev, lnk->wr_tx_bufs,	SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
658 		DMA_TO_DEVICE);
659 	if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
660 		rc = -EIO;
661 		goto dma_unmap;
662 	}
663 	smc_wr_init_sge(lnk);
664 	memset(lnk->wr_tx_mask, 0,
665 	       BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
666 	init_waitqueue_head(&lnk->wr_tx_wait);
667 	init_waitqueue_head(&lnk->wr_reg_wait);
668 	return rc;
669 
670 dma_unmap:
671 	ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
672 			    SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
673 			    DMA_FROM_DEVICE);
674 	lnk->wr_rx_dma_addr = 0;
675 out:
676 	return rc;
677 }
678