xref: /openbmc/linux/drivers/infiniband/hw/mthca/mthca_qp.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
33  */
34 
35 #include <linux/init.h>
36 
37 #include <ib_verbs.h>
38 #include <ib_cache.h>
39 #include <ib_pack.h>
40 
41 #include "mthca_dev.h"
42 #include "mthca_cmd.h"
43 #include "mthca_memfree.h"
44 
45 enum {
46 	MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
47 	MTHCA_ACK_REQ_FREQ       = 10,
48 	MTHCA_FLIGHT_LIMIT       = 9,
49 	MTHCA_UD_HEADER_SIZE     = 72 /* largest UD header possible */
50 };
51 
52 enum {
53 	MTHCA_QP_STATE_RST  = 0,
54 	MTHCA_QP_STATE_INIT = 1,
55 	MTHCA_QP_STATE_RTR  = 2,
56 	MTHCA_QP_STATE_RTS  = 3,
57 	MTHCA_QP_STATE_SQE  = 4,
58 	MTHCA_QP_STATE_SQD  = 5,
59 	MTHCA_QP_STATE_ERR  = 6,
60 	MTHCA_QP_STATE_DRAINING = 7
61 };
62 
63 enum {
64 	MTHCA_QP_ST_RC 	= 0x0,
65 	MTHCA_QP_ST_UC 	= 0x1,
66 	MTHCA_QP_ST_RD 	= 0x2,
67 	MTHCA_QP_ST_UD 	= 0x3,
68 	MTHCA_QP_ST_MLX = 0x7
69 };
70 
71 enum {
72 	MTHCA_QP_PM_MIGRATED = 0x3,
73 	MTHCA_QP_PM_ARMED    = 0x0,
74 	MTHCA_QP_PM_REARM    = 0x1
75 };
76 
77 enum {
78 	/* qp_context flags */
79 	MTHCA_QP_BIT_DE  = 1 <<  8,
80 	/* params1 */
81 	MTHCA_QP_BIT_SRE = 1 << 15,
82 	MTHCA_QP_BIT_SWE = 1 << 14,
83 	MTHCA_QP_BIT_SAE = 1 << 13,
84 	MTHCA_QP_BIT_SIC = 1 <<  4,
85 	MTHCA_QP_BIT_SSC = 1 <<  3,
86 	/* params2 */
87 	MTHCA_QP_BIT_RRE = 1 << 15,
88 	MTHCA_QP_BIT_RWE = 1 << 14,
89 	MTHCA_QP_BIT_RAE = 1 << 13,
90 	MTHCA_QP_BIT_RIC = 1 <<  4,
91 	MTHCA_QP_BIT_RSC = 1 <<  3
92 };
93 
94 struct mthca_qp_path {
95 	u32 port_pkey;
96 	u8  rnr_retry;
97 	u8  g_mylmc;
98 	u16 rlid;
99 	u8  ackto;
100 	u8  mgid_index;
101 	u8  static_rate;
102 	u8  hop_limit;
103 	u32 sl_tclass_flowlabel;
104 	u8  rgid[16];
105 } __attribute__((packed));
106 
107 struct mthca_qp_context {
108 	u32 flags;
109 	u32 tavor_sched_queue;	/* Reserved on Arbel */
110 	u8  mtu_msgmax;
111 	u8  rq_size_stride;	/* Reserved on Tavor */
112 	u8  sq_size_stride;	/* Reserved on Tavor */
113 	u8  rlkey_arbel_sched_queue;	/* Reserved on Tavor */
114 	u32 usr_page;
115 	u32 local_qpn;
116 	u32 remote_qpn;
117 	u32 reserved1[2];
118 	struct mthca_qp_path pri_path;
119 	struct mthca_qp_path alt_path;
120 	u32 rdd;
121 	u32 pd;
122 	u32 wqe_base;
123 	u32 wqe_lkey;
124 	u32 params1;
125 	u32 reserved2;
126 	u32 next_send_psn;
127 	u32 cqn_snd;
128 	u32 snd_wqe_base_l;	/* Next send WQE on Tavor */
129 	u32 snd_db_index;	/* (debugging only entries) */
130 	u32 last_acked_psn;
131 	u32 ssn;
132 	u32 params2;
133 	u32 rnr_nextrecvpsn;
134 	u32 ra_buff_indx;
135 	u32 cqn_rcv;
136 	u32 rcv_wqe_base_l;	/* Next recv WQE on Tavor */
137 	u32 rcv_db_index;	/* (debugging only entries) */
138 	u32 qkey;
139 	u32 srqn;
140 	u32 rmsn;
141 	u16 rq_wqe_counter;	/* reserved on Tavor */
142 	u16 sq_wqe_counter;	/* reserved on Tavor */
143 	u32 reserved3[18];
144 } __attribute__((packed));
145 
146 struct mthca_qp_param {
147 	u32 opt_param_mask;
148 	u32 reserved1;
149 	struct mthca_qp_context context;
150 	u32 reserved2[62];
151 } __attribute__((packed));
152 
153 enum {
154 	MTHCA_QP_OPTPAR_ALT_ADDR_PATH     = 1 << 0,
155 	MTHCA_QP_OPTPAR_RRE               = 1 << 1,
156 	MTHCA_QP_OPTPAR_RAE               = 1 << 2,
157 	MTHCA_QP_OPTPAR_RWE               = 1 << 3,
158 	MTHCA_QP_OPTPAR_PKEY_INDEX        = 1 << 4,
159 	MTHCA_QP_OPTPAR_Q_KEY             = 1 << 5,
160 	MTHCA_QP_OPTPAR_RNR_TIMEOUT       = 1 << 6,
161 	MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
162 	MTHCA_QP_OPTPAR_SRA_MAX           = 1 << 8,
163 	MTHCA_QP_OPTPAR_RRA_MAX           = 1 << 9,
164 	MTHCA_QP_OPTPAR_PM_STATE          = 1 << 10,
165 	MTHCA_QP_OPTPAR_PORT_NUM          = 1 << 11,
166 	MTHCA_QP_OPTPAR_RETRY_COUNT       = 1 << 12,
167 	MTHCA_QP_OPTPAR_ALT_RNR_RETRY     = 1 << 13,
168 	MTHCA_QP_OPTPAR_ACK_TIMEOUT       = 1 << 14,
169 	MTHCA_QP_OPTPAR_RNR_RETRY         = 1 << 15,
170 	MTHCA_QP_OPTPAR_SCHED_QUEUE       = 1 << 16
171 };
172 
173 enum {
174 	MTHCA_OPCODE_NOP            = 0x00,
175 	MTHCA_OPCODE_RDMA_WRITE     = 0x08,
176 	MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
177 	MTHCA_OPCODE_SEND           = 0x0a,
178 	MTHCA_OPCODE_SEND_IMM       = 0x0b,
179 	MTHCA_OPCODE_RDMA_READ      = 0x10,
180 	MTHCA_OPCODE_ATOMIC_CS      = 0x11,
181 	MTHCA_OPCODE_ATOMIC_FA      = 0x12,
182 	MTHCA_OPCODE_BIND_MW        = 0x18,
183 	MTHCA_OPCODE_INVALID        = 0xff
184 };
185 
186 enum {
187 	MTHCA_NEXT_DBD       = 1 << 7,
188 	MTHCA_NEXT_FENCE     = 1 << 6,
189 	MTHCA_NEXT_CQ_UPDATE = 1 << 3,
190 	MTHCA_NEXT_EVENT_GEN = 1 << 2,
191 	MTHCA_NEXT_SOLICIT   = 1 << 1,
192 
193 	MTHCA_MLX_VL15       = 1 << 17,
194 	MTHCA_MLX_SLR        = 1 << 16
195 };
196 
197 struct mthca_next_seg {
198 	u32 nda_op;		/* [31:6] next WQE [4:0] next opcode */
199 	u32 ee_nds;		/* [31:8] next EE  [7] DBD [6] F [5:0] next WQE size */
200 	u32 flags;		/* [3] CQ [2] Event [1] Solicit */
201 	u32 imm;		/* immediate data */
202 };
203 
204 struct mthca_tavor_ud_seg {
205 	u32 reserved1;
206 	u32 lkey;
207 	u64 av_addr;
208 	u32 reserved2[4];
209 	u32 dqpn;
210 	u32 qkey;
211 	u32 reserved3[2];
212 };
213 
214 struct mthca_arbel_ud_seg {
215 	u32 av[8];
216 	u32 dqpn;
217 	u32 qkey;
218 	u32 reserved[2];
219 };
220 
221 struct mthca_bind_seg {
222 	u32 flags;		/* [31] Atomic [30] rem write [29] rem read */
223 	u32 reserved;
224 	u32 new_rkey;
225 	u32 lkey;
226 	u64 addr;
227 	u64 length;
228 };
229 
230 struct mthca_raddr_seg {
231 	u64 raddr;
232 	u32 rkey;
233 	u32 reserved;
234 };
235 
236 struct mthca_atomic_seg {
237 	u64 swap_add;
238 	u64 compare;
239 };
240 
241 struct mthca_data_seg {
242 	u32 byte_count;
243 	u32 lkey;
244 	u64 addr;
245 };
246 
247 struct mthca_mlx_seg {
248 	u32 nda_op;
249 	u32 nds;
250 	u32 flags;		/* [17] VL15 [16] SLR [14:12] static rate
251 				   [11:8] SL [3] C [2] E */
252 	u16 rlid;
253 	u16 vcrc;
254 };
255 
256 static const u8 mthca_opcode[] = {
257 	[IB_WR_SEND]                 = MTHCA_OPCODE_SEND,
258 	[IB_WR_SEND_WITH_IMM]        = MTHCA_OPCODE_SEND_IMM,
259 	[IB_WR_RDMA_WRITE]           = MTHCA_OPCODE_RDMA_WRITE,
260 	[IB_WR_RDMA_WRITE_WITH_IMM]  = MTHCA_OPCODE_RDMA_WRITE_IMM,
261 	[IB_WR_RDMA_READ]            = MTHCA_OPCODE_RDMA_READ,
262 	[IB_WR_ATOMIC_CMP_AND_SWP]   = MTHCA_OPCODE_ATOMIC_CS,
263 	[IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
264 };
265 
266 static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
267 {
268 	return qp->qpn >= dev->qp_table.sqp_start &&
269 		qp->qpn <= dev->qp_table.sqp_start + 3;
270 }
271 
272 static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
273 {
274 	return qp->qpn >= dev->qp_table.sqp_start &&
275 		qp->qpn <= dev->qp_table.sqp_start + 1;
276 }
277 
278 static void *get_recv_wqe(struct mthca_qp *qp, int n)
279 {
280 	if (qp->is_direct)
281 		return qp->queue.direct.buf + (n << qp->rq.wqe_shift);
282 	else
283 		return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +
284 			((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
285 }
286 
287 static void *get_send_wqe(struct mthca_qp *qp, int n)
288 {
289 	if (qp->is_direct)
290 		return qp->queue.direct.buf + qp->send_wqe_offset +
291 			(n << qp->sq.wqe_shift);
292 	else
293 		return qp->queue.page_list[(qp->send_wqe_offset +
294 					    (n << qp->sq.wqe_shift)) >>
295 					   PAGE_SHIFT].buf +
296 			((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
297 			 (PAGE_SIZE - 1));
298 }
299 
300 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
301 		    enum ib_event_type event_type)
302 {
303 	struct mthca_qp *qp;
304 	struct ib_event event;
305 
306 	spin_lock(&dev->qp_table.lock);
307 	qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
308 	if (qp)
309 		atomic_inc(&qp->refcount);
310 	spin_unlock(&dev->qp_table.lock);
311 
312 	if (!qp) {
313 		mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
314 		return;
315 	}
316 
317 	event.device      = &dev->ib_dev;
318 	event.event       = event_type;
319 	event.element.qp  = &qp->ibqp;
320 	if (qp->ibqp.event_handler)
321 		qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
322 
323 	if (atomic_dec_and_test(&qp->refcount))
324 		wake_up(&qp->wait);
325 }
326 
327 static int to_mthca_state(enum ib_qp_state ib_state)
328 {
329 	switch (ib_state) {
330 	case IB_QPS_RESET: return MTHCA_QP_STATE_RST;
331 	case IB_QPS_INIT:  return MTHCA_QP_STATE_INIT;
332 	case IB_QPS_RTR:   return MTHCA_QP_STATE_RTR;
333 	case IB_QPS_RTS:   return MTHCA_QP_STATE_RTS;
334 	case IB_QPS_SQD:   return MTHCA_QP_STATE_SQD;
335 	case IB_QPS_SQE:   return MTHCA_QP_STATE_SQE;
336 	case IB_QPS_ERR:   return MTHCA_QP_STATE_ERR;
337 	default:                return -1;
338 	}
339 }
340 
341 enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
342 
343 static int to_mthca_st(int transport)
344 {
345 	switch (transport) {
346 	case RC:  return MTHCA_QP_ST_RC;
347 	case UC:  return MTHCA_QP_ST_UC;
348 	case UD:  return MTHCA_QP_ST_UD;
349 	case RD:  return MTHCA_QP_ST_RD;
350 	case MLX: return MTHCA_QP_ST_MLX;
351 	default:  return -1;
352 	}
353 }
354 
355 static const struct {
356 	int trans;
357 	u32 req_param[NUM_TRANS];
358 	u32 opt_param[NUM_TRANS];
359 } state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
360 	[IB_QPS_RESET] = {
361 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
362 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
363 		[IB_QPS_INIT]  = {
364 			.trans = MTHCA_TRANS_RST2INIT,
365 			.req_param = {
366 				[UD]  = (IB_QP_PKEY_INDEX |
367 					 IB_QP_PORT       |
368 					 IB_QP_QKEY),
369 				[RC]  = (IB_QP_PKEY_INDEX |
370 					 IB_QP_PORT       |
371 					 IB_QP_ACCESS_FLAGS),
372 				[MLX] = (IB_QP_PKEY_INDEX |
373 					 IB_QP_QKEY),
374 			},
375 			/* bug-for-bug compatibility with VAPI: */
376 			.opt_param = {
377 				[MLX] = IB_QP_PORT
378 			}
379 		},
380 	},
381 	[IB_QPS_INIT]  = {
382 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
383 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
384 		[IB_QPS_INIT]  = {
385 			.trans = MTHCA_TRANS_INIT2INIT,
386 			.opt_param = {
387 				[UD]  = (IB_QP_PKEY_INDEX |
388 					 IB_QP_PORT       |
389 					 IB_QP_QKEY),
390 				[RC]  = (IB_QP_PKEY_INDEX |
391 					 IB_QP_PORT       |
392 					 IB_QP_ACCESS_FLAGS),
393 				[MLX] = (IB_QP_PKEY_INDEX |
394 					 IB_QP_QKEY),
395 			}
396 		},
397 		[IB_QPS_RTR]   = {
398 			.trans = MTHCA_TRANS_INIT2RTR,
399 			.req_param = {
400 				[RC]  = (IB_QP_AV                  |
401 					 IB_QP_PATH_MTU            |
402 					 IB_QP_DEST_QPN            |
403 					 IB_QP_RQ_PSN              |
404 					 IB_QP_MAX_DEST_RD_ATOMIC  |
405 					 IB_QP_MIN_RNR_TIMER),
406 			},
407 			.opt_param = {
408 				[UD]  = (IB_QP_PKEY_INDEX |
409 					 IB_QP_QKEY),
410 				[RC]  = (IB_QP_ALT_PATH     |
411 					 IB_QP_ACCESS_FLAGS |
412 					 IB_QP_PKEY_INDEX),
413 				[MLX] = (IB_QP_PKEY_INDEX |
414 					 IB_QP_QKEY),
415 			}
416 		}
417 	},
418 	[IB_QPS_RTR]   = {
419 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
420 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
421 		[IB_QPS_RTS]   = {
422 			.trans = MTHCA_TRANS_RTR2RTS,
423 			.req_param = {
424 				[UD]  = IB_QP_SQ_PSN,
425 				[RC]  = (IB_QP_TIMEOUT           |
426 					 IB_QP_RETRY_CNT         |
427 					 IB_QP_RNR_RETRY         |
428 					 IB_QP_SQ_PSN            |
429 					 IB_QP_MAX_QP_RD_ATOMIC),
430 				[MLX] = IB_QP_SQ_PSN,
431 			},
432 			.opt_param = {
433 				[UD]  = (IB_QP_CUR_STATE             |
434 					 IB_QP_QKEY),
435 				[RC]  = (IB_QP_CUR_STATE             |
436 					 IB_QP_ALT_PATH              |
437 					 IB_QP_ACCESS_FLAGS          |
438 					 IB_QP_PKEY_INDEX            |
439 					 IB_QP_MIN_RNR_TIMER         |
440 					 IB_QP_PATH_MIG_STATE),
441 				[MLX] = (IB_QP_CUR_STATE             |
442 					 IB_QP_QKEY),
443 			}
444 		}
445 	},
446 	[IB_QPS_RTS]   = {
447 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
448 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
449 		[IB_QPS_RTS]   = {
450 			.trans = MTHCA_TRANS_RTS2RTS,
451 			.opt_param = {
452 				[UD]  = (IB_QP_CUR_STATE             |
453 					 IB_QP_QKEY),
454 				[RC]  = (IB_QP_ACCESS_FLAGS          |
455 					 IB_QP_ALT_PATH              |
456 					 IB_QP_PATH_MIG_STATE        |
457 					 IB_QP_MIN_RNR_TIMER),
458 				[MLX] = (IB_QP_CUR_STATE             |
459 					 IB_QP_QKEY),
460 			}
461 		},
462 		[IB_QPS_SQD]   = {
463 			.trans = MTHCA_TRANS_RTS2SQD,
464 		},
465 	},
466 	[IB_QPS_SQD]   = {
467 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
468 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
469 		[IB_QPS_RTS]   = {
470 			.trans = MTHCA_TRANS_SQD2RTS,
471 			.opt_param = {
472 				[UD]  = (IB_QP_CUR_STATE             |
473 					 IB_QP_QKEY),
474 				[RC]  = (IB_QP_CUR_STATE             |
475 					 IB_QP_ALT_PATH              |
476 					 IB_QP_ACCESS_FLAGS          |
477 					 IB_QP_MIN_RNR_TIMER         |
478 					 IB_QP_PATH_MIG_STATE),
479 				[MLX] = (IB_QP_CUR_STATE             |
480 					 IB_QP_QKEY),
481 			}
482 		},
483 		[IB_QPS_SQD]   = {
484 			.trans = MTHCA_TRANS_SQD2SQD,
485 			.opt_param = {
486 				[UD]  = (IB_QP_PKEY_INDEX            |
487 					 IB_QP_QKEY),
488 				[RC]  = (IB_QP_AV                    |
489 					 IB_QP_TIMEOUT               |
490 					 IB_QP_RETRY_CNT             |
491 					 IB_QP_RNR_RETRY             |
492 					 IB_QP_MAX_QP_RD_ATOMIC      |
493 					 IB_QP_MAX_DEST_RD_ATOMIC    |
494 					 IB_QP_CUR_STATE             |
495 					 IB_QP_ALT_PATH              |
496 					 IB_QP_ACCESS_FLAGS          |
497 					 IB_QP_PKEY_INDEX            |
498 					 IB_QP_MIN_RNR_TIMER         |
499 					 IB_QP_PATH_MIG_STATE),
500 				[MLX] = (IB_QP_PKEY_INDEX            |
501 					 IB_QP_QKEY),
502 			}
503 		}
504 	},
505 	[IB_QPS_SQE]   = {
506 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
507 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
508 		[IB_QPS_RTS]   = {
509 			.trans = MTHCA_TRANS_SQERR2RTS,
510 			.opt_param = {
511 				[UD]  = (IB_QP_CUR_STATE             |
512 					 IB_QP_QKEY),
513 				[RC]  = (IB_QP_CUR_STATE             |
514 					 IB_QP_MIN_RNR_TIMER),
515 				[MLX] = (IB_QP_CUR_STATE             |
516 					 IB_QP_QKEY),
517 			}
518 		}
519 	},
520 	[IB_QPS_ERR] = {
521 		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
522 		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
523 	}
524 };
525 
526 static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
527 			int attr_mask)
528 {
529 	if (attr_mask & IB_QP_PKEY_INDEX)
530 		sqp->pkey_index = attr->pkey_index;
531 	if (attr_mask & IB_QP_QKEY)
532 		sqp->qkey = attr->qkey;
533 	if (attr_mask & IB_QP_SQ_PSN)
534 		sqp->send_psn = attr->sq_psn;
535 }
536 
537 static void init_port(struct mthca_dev *dev, int port)
538 {
539 	int err;
540 	u8 status;
541 	struct mthca_init_ib_param param;
542 
543 	memset(&param, 0, sizeof param);
544 
545 	param.enable_1x = 1;
546 	param.enable_4x = 1;
547 	param.vl_cap    = dev->limits.vl_cap;
548 	param.mtu_cap   = dev->limits.mtu_cap;
549 	param.gid_cap   = dev->limits.gid_table_len;
550 	param.pkey_cap  = dev->limits.pkey_table_len;
551 
552 	err = mthca_INIT_IB(dev, &param, port, &status);
553 	if (err)
554 		mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
555 	if (status)
556 		mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
557 }
558 
559 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
560 {
561 	struct mthca_dev *dev = to_mdev(ibqp->device);
562 	struct mthca_qp *qp = to_mqp(ibqp);
563 	enum ib_qp_state cur_state, new_state;
564 	void *mailbox = NULL;
565 	struct mthca_qp_param *qp_param;
566 	struct mthca_qp_context *qp_context;
567 	u32 req_param, opt_param;
568 	u8 status;
569 	int err;
570 
571 	if (attr_mask & IB_QP_CUR_STATE) {
572 		if (attr->cur_qp_state != IB_QPS_RTR &&
573 		    attr->cur_qp_state != IB_QPS_RTS &&
574 		    attr->cur_qp_state != IB_QPS_SQD &&
575 		    attr->cur_qp_state != IB_QPS_SQE)
576 			return -EINVAL;
577 		else
578 			cur_state = attr->cur_qp_state;
579 	} else {
580 		spin_lock_irq(&qp->sq.lock);
581 		spin_lock(&qp->rq.lock);
582 		cur_state = qp->state;
583 		spin_unlock(&qp->rq.lock);
584 		spin_unlock_irq(&qp->sq.lock);
585 	}
586 
587 	if (attr_mask & IB_QP_STATE) {
588                if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
589 			return -EINVAL;
590 		new_state = attr->qp_state;
591 	} else
592 		new_state = cur_state;
593 
594 	if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
595 		mthca_dbg(dev, "Illegal QP transition "
596 			  "%d->%d\n", cur_state, new_state);
597 		return -EINVAL;
598 	}
599 
600 	req_param = state_table[cur_state][new_state].req_param[qp->transport];
601 	opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
602 
603 	if ((req_param & attr_mask) != req_param) {
604 		mthca_dbg(dev, "QP transition "
605 			  "%d->%d missing req attr 0x%08x\n",
606 			  cur_state, new_state,
607 			  req_param & ~attr_mask);
608 		return -EINVAL;
609 	}
610 
611 	if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
612 		mthca_dbg(dev, "QP transition (transport %d) "
613 			  "%d->%d has extra attr 0x%08x\n",
614 			  qp->transport,
615 			  cur_state, new_state,
616 			  attr_mask & ~(req_param | opt_param |
617 						 IB_QP_STATE));
618 		return -EINVAL;
619 	}
620 
621 	mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
622 	if (!mailbox)
623 		return -ENOMEM;
624 	qp_param = MAILBOX_ALIGN(mailbox);
625 	qp_context = &qp_param->context;
626 	memset(qp_param, 0, sizeof *qp_param);
627 
628 	qp_context->flags      = cpu_to_be32((to_mthca_state(new_state) << 28) |
629 					     (to_mthca_st(qp->transport) << 16));
630 	qp_context->flags     |= cpu_to_be32(MTHCA_QP_BIT_DE);
631 	if (!(attr_mask & IB_QP_PATH_MIG_STATE))
632 		qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
633 	else {
634 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
635 		switch (attr->path_mig_state) {
636 		case IB_MIG_MIGRATED:
637 			qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
638 			break;
639 		case IB_MIG_REARM:
640 			qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
641 			break;
642 		case IB_MIG_ARMED:
643 			qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
644 			break;
645 		}
646 	}
647 
648 	/* leave tavor_sched_queue as 0 */
649 
650 	if (qp->transport == MLX || qp->transport == UD)
651 		qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
652 	else if (attr_mask & IB_QP_PATH_MTU)
653 		qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
654 
655 	if (dev->hca_type == ARBEL_NATIVE) {
656 		qp_context->rq_size_stride =
657 			((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
658 		qp_context->sq_size_stride =
659 			((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4);
660 	}
661 
662 	/* leave arbel_sched_queue as 0 */
663 
664 	qp_context->usr_page   = cpu_to_be32(dev->driver_uar.index);
665 	qp_context->local_qpn  = cpu_to_be32(qp->qpn);
666 	if (attr_mask & IB_QP_DEST_QPN) {
667 		qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
668 	}
669 
670 	if (qp->transport == MLX)
671 		qp_context->pri_path.port_pkey |=
672 			cpu_to_be32(to_msqp(qp)->port << 24);
673 	else {
674 		if (attr_mask & IB_QP_PORT) {
675 			qp_context->pri_path.port_pkey |=
676 				cpu_to_be32(attr->port_num << 24);
677 			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
678 		}
679 	}
680 
681 	if (attr_mask & IB_QP_PKEY_INDEX) {
682 		qp_context->pri_path.port_pkey |=
683 			cpu_to_be32(attr->pkey_index);
684 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
685 	}
686 
687 	if (attr_mask & IB_QP_RNR_RETRY) {
688 		qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
689 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
690 	}
691 
692 	if (attr_mask & IB_QP_AV) {
693 		qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
694 		qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
695 		qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
696 		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
697 			qp_context->pri_path.g_mylmc |= 1 << 7;
698 			qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
699 			qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
700 			qp_context->pri_path.sl_tclass_flowlabel =
701 				cpu_to_be32((attr->ah_attr.sl << 28)                |
702 					    (attr->ah_attr.grh.traffic_class << 20) |
703 					    (attr->ah_attr.grh.flow_label));
704 			memcpy(qp_context->pri_path.rgid,
705 			       attr->ah_attr.grh.dgid.raw, 16);
706 		} else {
707 			qp_context->pri_path.sl_tclass_flowlabel =
708 				cpu_to_be32(attr->ah_attr.sl << 28);
709 		}
710 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
711 	}
712 
713 	if (attr_mask & IB_QP_TIMEOUT) {
714 		qp_context->pri_path.ackto = attr->timeout;
715 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
716 	}
717 
718 	/* XXX alt_path */
719 
720 	/* leave rdd as 0 */
721 	qp_context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
722 	/* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
723 	qp_context->wqe_lkey   = cpu_to_be32(qp->mr.ibmr.lkey);
724 	qp_context->params1    = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
725 					     (MTHCA_FLIGHT_LIMIT << 24) |
726 					     MTHCA_QP_BIT_SRE           |
727 					     MTHCA_QP_BIT_SWE           |
728 					     MTHCA_QP_BIT_SAE);
729 	if (qp->sq_policy == IB_SIGNAL_ALL_WR)
730 		qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
731 	if (attr_mask & IB_QP_RETRY_CNT) {
732 		qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
733 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
734 	}
735 
736 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
737 		qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
738 						       ffs(attr->max_dest_rd_atomic) - 1 : 0,
739 						       7) << 21);
740 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
741 	}
742 
743 	if (attr_mask & IB_QP_SQ_PSN)
744 		qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
745 	qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
746 
747 	if (dev->hca_type == ARBEL_NATIVE) {
748 		qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
749 		qp_context->snd_db_index   = cpu_to_be32(qp->sq.db_index);
750 	}
751 
752 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
753 		/*
754 		 * Only enable RDMA/atomics if we have responder
755 		 * resources set to a non-zero value.
756 		 */
757 		if (qp->resp_depth) {
758 			qp_context->params2 |=
759 				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
760 					    MTHCA_QP_BIT_RWE : 0);
761 			qp_context->params2 |=
762 				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
763 					    MTHCA_QP_BIT_RRE : 0);
764 			qp_context->params2 |=
765 				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
766 					    MTHCA_QP_BIT_RAE : 0);
767 		}
768 
769 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
770 							MTHCA_QP_OPTPAR_RRE |
771 							MTHCA_QP_OPTPAR_RAE);
772 
773 		qp->atomic_rd_en = attr->qp_access_flags;
774 	}
775 
776 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
777 		u8 rra_max;
778 
779 		if (qp->resp_depth && !attr->max_rd_atomic) {
780 			/*
781 			 * Lowering our responder resources to zero.
782 			 * Turn off RDMA/atomics as responder.
783 			 * (RWE/RRE/RAE in params2 already zero)
784 			 */
785 			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
786 								MTHCA_QP_OPTPAR_RRE |
787 								MTHCA_QP_OPTPAR_RAE);
788 		}
789 
790 		if (!qp->resp_depth && attr->max_rd_atomic) {
791 			/*
792 			 * Increasing our responder resources from
793 			 * zero.  Turn on RDMA/atomics as appropriate.
794 			 */
795 			qp_context->params2 |=
796 				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ?
797 					    MTHCA_QP_BIT_RWE : 0);
798 			qp_context->params2 |=
799 				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
800 					    MTHCA_QP_BIT_RRE : 0);
801 			qp_context->params2 |=
802 				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
803 					    MTHCA_QP_BIT_RAE : 0);
804 
805 			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
806 								MTHCA_QP_OPTPAR_RRE |
807 								MTHCA_QP_OPTPAR_RAE);
808 		}
809 
810 		for (rra_max = 0;
811 		     1 << rra_max < attr->max_rd_atomic &&
812 			     rra_max < dev->qp_table.rdb_shift;
813 		     ++rra_max)
814 			; /* nothing */
815 
816 		qp_context->params2      |= cpu_to_be32(rra_max << 21);
817 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
818 
819 		qp->resp_depth = attr->max_rd_atomic;
820 	}
821 
822 	qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
823 
824 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
825 		qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
826 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
827 	}
828 	if (attr_mask & IB_QP_RQ_PSN)
829 		qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
830 
831 	qp_context->ra_buff_indx =
832 		cpu_to_be32(dev->qp_table.rdb_base +
833 			    ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
834 			     dev->qp_table.rdb_shift));
835 
836 	qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
837 
838 	if (dev->hca_type == ARBEL_NATIVE)
839 		qp_context->rcv_db_index   = cpu_to_be32(qp->rq.db_index);
840 
841 	if (attr_mask & IB_QP_QKEY) {
842 		qp_context->qkey = cpu_to_be32(attr->qkey);
843 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
844 	}
845 
846 	err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
847 			      qp->qpn, 0, qp_param, 0, &status);
848 	if (status) {
849 		mthca_warn(dev, "modify QP %d returned status %02x.\n",
850 			   state_table[cur_state][new_state].trans, status);
851 		err = -EINVAL;
852 	}
853 
854 	if (!err)
855 		qp->state = new_state;
856 
857 	kfree(mailbox);
858 
859 	if (is_sqp(dev, qp))
860 		store_attrs(to_msqp(qp), attr, attr_mask);
861 
862 	/*
863 	 * If we are moving QP0 to RTR, bring the IB link up; if we
864 	 * are moving QP0 to RESET or ERROR, bring the link back down.
865 	 */
866 	if (is_qp0(dev, qp)) {
867 		if (cur_state != IB_QPS_RTR &&
868 		    new_state == IB_QPS_RTR)
869 			init_port(dev, to_msqp(qp)->port);
870 
871 		if (cur_state != IB_QPS_RESET &&
872 		    cur_state != IB_QPS_ERR &&
873 		    (new_state == IB_QPS_RESET ||
874 		     new_state == IB_QPS_ERR))
875 			mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
876 	}
877 
878 	return err;
879 }
880 
881 /*
882  * Allocate and register buffer for WQEs.  qp->rq.max, sq.max,
883  * rq.max_gs and sq.max_gs must all be assigned.
884  * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
885  * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
886  * queue)
887  */
888 static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
889 			       struct mthca_pd *pd,
890 			       struct mthca_qp *qp)
891 {
892 	int size;
893 	int i;
894 	int npages, shift;
895 	dma_addr_t t;
896 	u64 *dma_list = NULL;
897 	int err = -ENOMEM;
898 
899 	size = sizeof (struct mthca_next_seg) +
900 		qp->rq.max_gs * sizeof (struct mthca_data_seg);
901 
902 	for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
903 	     qp->rq.wqe_shift++)
904 		; /* nothing */
905 
906 	size = sizeof (struct mthca_next_seg) +
907 		qp->sq.max_gs * sizeof (struct mthca_data_seg);
908 	switch (qp->transport) {
909 	case MLX:
910 		size += 2 * sizeof (struct mthca_data_seg);
911 		break;
912 	case UD:
913 		if (dev->hca_type == ARBEL_NATIVE)
914 			size += sizeof (struct mthca_arbel_ud_seg);
915 		else
916 			size += sizeof (struct mthca_tavor_ud_seg);
917 		break;
918 	default:
919 		/* bind seg is as big as atomic + raddr segs */
920 		size += sizeof (struct mthca_bind_seg);
921 	}
922 
923 	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
924 	     qp->sq.wqe_shift++)
925 		; /* nothing */
926 
927 	qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
928 				    1 << qp->sq.wqe_shift);
929 	size = PAGE_ALIGN(qp->send_wqe_offset +
930 			  (qp->sq.max << qp->sq.wqe_shift));
931 
932 	qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
933 			   GFP_KERNEL);
934 	if (!qp->wrid)
935 		goto err_out;
936 
937 	if (size <= MTHCA_MAX_DIRECT_QP_SIZE) {
938 		qp->is_direct = 1;
939 		npages = 1;
940 		shift = get_order(size) + PAGE_SHIFT;
941 
942 		if (0)
943 			mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
944 				  size, shift);
945 
946 		qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
947 		if (!qp->queue.direct.buf)
948 			goto err_out;
949 
950 		pci_unmap_addr_set(&qp->queue.direct, mapping, t);
951 
952 		memset(qp->queue.direct.buf, 0, size);
953 
954 		while (t & ((1 << shift) - 1)) {
955 			--shift;
956 			npages *= 2;
957 		}
958 
959 		dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
960 		if (!dma_list)
961 			goto err_out_free;
962 
963 		for (i = 0; i < npages; ++i)
964 			dma_list[i] = t + i * (1 << shift);
965 	} else {
966 		qp->is_direct = 0;
967 		npages = size / PAGE_SIZE;
968 		shift = PAGE_SHIFT;
969 
970 		if (0)
971 			mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages);
972 
973 		dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
974 		if (!dma_list)
975 			goto err_out;
976 
977 		qp->queue.page_list = kmalloc(npages *
978 					      sizeof *qp->queue.page_list,
979 					      GFP_KERNEL);
980 		if (!qp->queue.page_list)
981 			goto err_out;
982 
983 		for (i = 0; i < npages; ++i) {
984 			qp->queue.page_list[i].buf =
985 				pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
986 			if (!qp->queue.page_list[i].buf)
987 				goto err_out_free;
988 
989 			memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE);
990 
991 			pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t);
992 			dma_list[i] = t;
993 		}
994 	}
995 
996 	err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift,
997 				  npages, 0, size,
998 				  MTHCA_MPT_FLAG_LOCAL_READ,
999 				  &qp->mr);
1000 	if (err)
1001 		goto err_out_free;
1002 
1003 	kfree(dma_list);
1004 	return 0;
1005 
1006  err_out_free:
1007 	if (qp->is_direct) {
1008 		pci_free_consistent(dev->pdev, size,
1009 				    qp->queue.direct.buf,
1010 				    pci_unmap_addr(&qp->queue.direct, mapping));
1011 	} else
1012 		for (i = 0; i < npages; ++i) {
1013 			if (qp->queue.page_list[i].buf)
1014 				pci_free_consistent(dev->pdev, PAGE_SIZE,
1015 						    qp->queue.page_list[i].buf,
1016 						    pci_unmap_addr(&qp->queue.page_list[i],
1017 								   mapping));
1018 
1019 		}
1020 
1021  err_out:
1022 	kfree(qp->wrid);
1023 	kfree(dma_list);
1024 	return err;
1025 }
1026 
1027 static int mthca_alloc_memfree(struct mthca_dev *dev,
1028 			       struct mthca_qp *qp)
1029 {
1030 	int ret = 0;
1031 
1032 	if (dev->hca_type == ARBEL_NATIVE) {
1033 		ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
1034 		if (ret)
1035 			return ret;
1036 
1037 		ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
1038 		if (ret)
1039 			goto err_qpc;
1040 
1041 		qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1042 						 qp->qpn, &qp->rq.db);
1043 		if (qp->rq.db_index < 0) {
1044 			ret = -ENOMEM;
1045 			goto err_eqpc;
1046 		}
1047 
1048 		qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1049 						 qp->qpn, &qp->sq.db);
1050 		if (qp->sq.db_index < 0) {
1051 			ret = -ENOMEM;
1052 			goto err_rq_db;
1053 		}
1054 	}
1055 
1056 	return 0;
1057 
1058 err_rq_db:
1059 	mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1060 
1061 err_eqpc:
1062 	mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1063 
1064 err_qpc:
1065 	mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1066 
1067 	return ret;
1068 }
1069 
1070 static void mthca_free_memfree(struct mthca_dev *dev,
1071 			       struct mthca_qp *qp)
1072 {
1073 	if (dev->hca_type == ARBEL_NATIVE) {
1074 		mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1075 		mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1076 		mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1077 		mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1078 	}
1079 }
1080 
1081 static void mthca_wq_init(struct mthca_wq* wq)
1082 {
1083 	spin_lock_init(&wq->lock);
1084 	wq->next_ind  = 0;
1085 	wq->last_comp = wq->max - 1;
1086 	wq->head      = 0;
1087 	wq->tail      = 0;
1088 	wq->last      = NULL;
1089 }
1090 
1091 static int mthca_alloc_qp_common(struct mthca_dev *dev,
1092 				 struct mthca_pd *pd,
1093 				 struct mthca_cq *send_cq,
1094 				 struct mthca_cq *recv_cq,
1095 				 enum ib_sig_type send_policy,
1096 				 struct mthca_qp *qp)
1097 {
1098 	struct mthca_next_seg *wqe;
1099 	int ret;
1100 	int i;
1101 
1102 	atomic_set(&qp->refcount, 1);
1103 	qp->state    	 = IB_QPS_RESET;
1104 	qp->atomic_rd_en = 0;
1105 	qp->resp_depth   = 0;
1106 	qp->sq_policy    = send_policy;
1107 	mthca_wq_init(&qp->sq);
1108 	mthca_wq_init(&qp->rq);
1109 
1110 	ret = mthca_alloc_memfree(dev, qp);
1111 	if (ret)
1112 		return ret;
1113 
1114 	ret = mthca_alloc_wqe_buf(dev, pd, qp);
1115 	if (ret) {
1116 		mthca_free_memfree(dev, qp);
1117 		return ret;
1118 	}
1119 
1120 	if (dev->hca_type == ARBEL_NATIVE) {
1121 		for (i = 0; i < qp->rq.max; ++i) {
1122 			wqe = get_recv_wqe(qp, i);
1123 			wqe->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
1124 						  qp->rq.wqe_shift);
1125 			wqe->ee_nds = cpu_to_be32(1 << (qp->rq.wqe_shift - 4));
1126 		}
1127 
1128 		for (i = 0; i < qp->sq.max; ++i) {
1129 			wqe = get_send_wqe(qp, i);
1130 			wqe->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
1131 						   qp->sq.wqe_shift) +
1132 						  qp->send_wqe_offset);
1133 		}
1134 	}
1135 
1136 	return 0;
1137 }
1138 
1139 static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
1140 {
1141 	int i;
1142 
1143 	if (dev->hca_type != ARBEL_NATIVE)
1144 		return;
1145 
1146 	for (i = 0; 1 << i < qp->rq.max; ++i)
1147 		; /* nothing */
1148 
1149 	qp->rq.max = 1 << i;
1150 
1151 	for (i = 0; 1 << i < qp->sq.max; ++i)
1152 		; /* nothing */
1153 
1154 	qp->sq.max = 1 << i;
1155 }
1156 
1157 int mthca_alloc_qp(struct mthca_dev *dev,
1158 		   struct mthca_pd *pd,
1159 		   struct mthca_cq *send_cq,
1160 		   struct mthca_cq *recv_cq,
1161 		   enum ib_qp_type type,
1162 		   enum ib_sig_type send_policy,
1163 		   struct mthca_qp *qp)
1164 {
1165 	int err;
1166 
1167 	mthca_align_qp_size(dev, qp);
1168 
1169 	switch (type) {
1170 	case IB_QPT_RC: qp->transport = RC; break;
1171 	case IB_QPT_UC: qp->transport = UC; break;
1172 	case IB_QPT_UD: qp->transport = UD; break;
1173 	default: return -EINVAL;
1174 	}
1175 
1176 	qp->qpn = mthca_alloc(&dev->qp_table.alloc);
1177 	if (qp->qpn == -1)
1178 		return -ENOMEM;
1179 
1180 	err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1181 				    send_policy, qp);
1182 	if (err) {
1183 		mthca_free(&dev->qp_table.alloc, qp->qpn);
1184 		return err;
1185 	}
1186 
1187 	spin_lock_irq(&dev->qp_table.lock);
1188 	mthca_array_set(&dev->qp_table.qp,
1189 			qp->qpn & (dev->limits.num_qps - 1), qp);
1190 	spin_unlock_irq(&dev->qp_table.lock);
1191 
1192 	return 0;
1193 }
1194 
1195 int mthca_alloc_sqp(struct mthca_dev *dev,
1196 		    struct mthca_pd *pd,
1197 		    struct mthca_cq *send_cq,
1198 		    struct mthca_cq *recv_cq,
1199 		    enum ib_sig_type send_policy,
1200 		    int qpn,
1201 		    int port,
1202 		    struct mthca_sqp *sqp)
1203 {
1204 	int err = 0;
1205 	u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1206 
1207 	mthca_align_qp_size(dev, &sqp->qp);
1208 
1209 	sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
1210 	sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
1211 					     &sqp->header_dma, GFP_KERNEL);
1212 	if (!sqp->header_buf)
1213 		return -ENOMEM;
1214 
1215 	spin_lock_irq(&dev->qp_table.lock);
1216 	if (mthca_array_get(&dev->qp_table.qp, mqpn))
1217 		err = -EBUSY;
1218 	else
1219 		mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
1220 	spin_unlock_irq(&dev->qp_table.lock);
1221 
1222 	if (err)
1223 		goto err_out;
1224 
1225 	sqp->port = port;
1226 	sqp->qp.qpn       = mqpn;
1227 	sqp->qp.transport = MLX;
1228 
1229 	err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1230 				    send_policy, &sqp->qp);
1231 	if (err)
1232 		goto err_out_free;
1233 
1234 	atomic_inc(&pd->sqp_count);
1235 
1236 	return 0;
1237 
1238  err_out_free:
1239 	/*
1240 	 * Lock CQs here, so that CQ polling code can do QP lookup
1241 	 * without taking a lock.
1242 	 */
1243 	spin_lock_irq(&send_cq->lock);
1244 	if (send_cq != recv_cq)
1245 		spin_lock(&recv_cq->lock);
1246 
1247 	spin_lock(&dev->qp_table.lock);
1248 	mthca_array_clear(&dev->qp_table.qp, mqpn);
1249 	spin_unlock(&dev->qp_table.lock);
1250 
1251 	if (send_cq != recv_cq)
1252 		spin_unlock(&recv_cq->lock);
1253 	spin_unlock_irq(&send_cq->lock);
1254 
1255  err_out:
1256 	dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
1257 			  sqp->header_buf, sqp->header_dma);
1258 
1259 	return err;
1260 }
1261 
1262 void mthca_free_qp(struct mthca_dev *dev,
1263 		   struct mthca_qp *qp)
1264 {
1265 	u8 status;
1266 	int size;
1267 	int i;
1268 	struct mthca_cq *send_cq;
1269 	struct mthca_cq *recv_cq;
1270 
1271 	send_cq = to_mcq(qp->ibqp.send_cq);
1272 	recv_cq = to_mcq(qp->ibqp.recv_cq);
1273 
1274 	/*
1275 	 * Lock CQs here, so that CQ polling code can do QP lookup
1276 	 * without taking a lock.
1277 	 */
1278 	spin_lock_irq(&send_cq->lock);
1279 	if (send_cq != recv_cq)
1280 		spin_lock(&recv_cq->lock);
1281 
1282 	spin_lock(&dev->qp_table.lock);
1283 	mthca_array_clear(&dev->qp_table.qp,
1284 			  qp->qpn & (dev->limits.num_qps - 1));
1285 	spin_unlock(&dev->qp_table.lock);
1286 
1287 	if (send_cq != recv_cq)
1288 		spin_unlock(&recv_cq->lock);
1289 	spin_unlock_irq(&send_cq->lock);
1290 
1291 	atomic_dec(&qp->refcount);
1292 	wait_event(qp->wait, !atomic_read(&qp->refcount));
1293 
1294 	if (qp->state != IB_QPS_RESET)
1295 		mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
1296 
1297 	mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
1298 	if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
1299 		mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
1300 
1301 	mthca_free_mr(dev, &qp->mr);
1302 
1303 	size = PAGE_ALIGN(qp->send_wqe_offset +
1304 			  (qp->sq.max << qp->sq.wqe_shift));
1305 
1306 	if (qp->is_direct) {
1307 		pci_free_consistent(dev->pdev, size,
1308 				    qp->queue.direct.buf,
1309 				    pci_unmap_addr(&qp->queue.direct, mapping));
1310 	} else {
1311 		for (i = 0; i < size / PAGE_SIZE; ++i) {
1312 			pci_free_consistent(dev->pdev, PAGE_SIZE,
1313 					    qp->queue.page_list[i].buf,
1314 					    pci_unmap_addr(&qp->queue.page_list[i],
1315 							   mapping));
1316 		}
1317 	}
1318 
1319 	kfree(qp->wrid);
1320 
1321 	mthca_free_memfree(dev, qp);
1322 
1323 	if (is_sqp(dev, qp)) {
1324 		atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
1325 		dma_free_coherent(&dev->pdev->dev,
1326 				  to_msqp(qp)->header_buf_size,
1327 				  to_msqp(qp)->header_buf,
1328 				  to_msqp(qp)->header_dma);
1329 	} else
1330 		mthca_free(&dev->qp_table.alloc, qp->qpn);
1331 }
1332 
1333 /* Create UD header for an MLX send and build a data segment for it */
1334 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1335 			    int ind, struct ib_send_wr *wr,
1336 			    struct mthca_mlx_seg *mlx,
1337 			    struct mthca_data_seg *data)
1338 {
1339 	int header_size;
1340 	int err;
1341 
1342 	ib_ud_header_init(256, /* assume a MAD */
1343 			  sqp->ud_header.grh_present,
1344 			  &sqp->ud_header);
1345 
1346 	err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
1347 	if (err)
1348 		return err;
1349 	mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
1350 	mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
1351 				  (sqp->ud_header.lrh.destination_lid == 0xffff ?
1352 				   MTHCA_MLX_SLR : 0) |
1353 				  (sqp->ud_header.lrh.service_level << 8));
1354 	mlx->rlid = sqp->ud_header.lrh.destination_lid;
1355 	mlx->vcrc = 0;
1356 
1357 	switch (wr->opcode) {
1358 	case IB_WR_SEND:
1359 		sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1360 		sqp->ud_header.immediate_present = 0;
1361 		break;
1362 	case IB_WR_SEND_WITH_IMM:
1363 		sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1364 		sqp->ud_header.immediate_present = 1;
1365 		sqp->ud_header.immediate_data = wr->imm_data;
1366 		break;
1367 	default:
1368 		return -EINVAL;
1369 	}
1370 
1371 	sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
1372 	if (sqp->ud_header.lrh.destination_lid == 0xffff)
1373 		sqp->ud_header.lrh.source_lid = 0xffff;
1374 	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1375 	if (!sqp->qp.ibqp.qp_num)
1376 		ib_get_cached_pkey(&dev->ib_dev, sqp->port,
1377 				   sqp->pkey_index,
1378 				   &sqp->ud_header.bth.pkey);
1379 	else
1380 		ib_get_cached_pkey(&dev->ib_dev, sqp->port,
1381 				   wr->wr.ud.pkey_index,
1382 				   &sqp->ud_header.bth.pkey);
1383 	cpu_to_be16s(&sqp->ud_header.bth.pkey);
1384 	sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1385 	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1386 	sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
1387 					       sqp->qkey : wr->wr.ud.remote_qkey);
1388 	sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
1389 
1390 	header_size = ib_ud_header_pack(&sqp->ud_header,
1391 					sqp->header_buf +
1392 					ind * MTHCA_UD_HEADER_SIZE);
1393 
1394 	data->byte_count = cpu_to_be32(header_size);
1395 	data->lkey       = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
1396 	data->addr       = cpu_to_be64(sqp->header_dma +
1397 				       ind * MTHCA_UD_HEADER_SIZE);
1398 
1399 	return 0;
1400 }
1401 
1402 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
1403 				    struct ib_cq *ib_cq)
1404 {
1405 	unsigned cur;
1406 	struct mthca_cq *cq;
1407 
1408 	cur = wq->head - wq->tail;
1409 	if (likely(cur + nreq < wq->max))
1410 		return 0;
1411 
1412 	cq = to_mcq(ib_cq);
1413 	spin_lock(&cq->lock);
1414 	cur = wq->head - wq->tail;
1415 	spin_unlock(&cq->lock);
1416 
1417 	return cur + nreq >= wq->max;
1418 }
1419 
1420 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1421 			  struct ib_send_wr **bad_wr)
1422 {
1423 	struct mthca_dev *dev = to_mdev(ibqp->device);
1424 	struct mthca_qp *qp = to_mqp(ibqp);
1425 	void *wqe;
1426 	void *prev_wqe;
1427 	unsigned long flags;
1428 	int err = 0;
1429 	int nreq;
1430 	int i;
1431 	int size;
1432 	int size0 = 0;
1433 	u32 f0 = 0;
1434 	int ind;
1435 	u8 op0 = 0;
1436 
1437 	spin_lock_irqsave(&qp->sq.lock, flags);
1438 
1439 	/* XXX check that state is OK to post send */
1440 
1441 	ind = qp->sq.next_ind;
1442 
1443 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
1444 		if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1445 			mthca_err(dev, "SQ %06x full (%u head, %u tail,"
1446 					" %d max, %d nreq)\n", qp->qpn,
1447 					qp->sq.head, qp->sq.tail,
1448 					qp->sq.max, nreq);
1449 			err = -ENOMEM;
1450 			*bad_wr = wr;
1451 			goto out;
1452 		}
1453 
1454 		wqe = get_send_wqe(qp, ind);
1455 		prev_wqe = qp->sq.last;
1456 		qp->sq.last = wqe;
1457 
1458 		((struct mthca_next_seg *) wqe)->nda_op = 0;
1459 		((struct mthca_next_seg *) wqe)->ee_nds = 0;
1460 		((struct mthca_next_seg *) wqe)->flags =
1461 			((wr->send_flags & IB_SEND_SIGNALED) ?
1462 			 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1463 			((wr->send_flags & IB_SEND_SOLICITED) ?
1464 			 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
1465 			cpu_to_be32(1);
1466 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1467 		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1468 			((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
1469 
1470 		wqe += sizeof (struct mthca_next_seg);
1471 		size = sizeof (struct mthca_next_seg) / 16;
1472 
1473 		switch (qp->transport) {
1474 		case RC:
1475 			switch (wr->opcode) {
1476 			case IB_WR_ATOMIC_CMP_AND_SWP:
1477 			case IB_WR_ATOMIC_FETCH_AND_ADD:
1478 				((struct mthca_raddr_seg *) wqe)->raddr =
1479 					cpu_to_be64(wr->wr.atomic.remote_addr);
1480 				((struct mthca_raddr_seg *) wqe)->rkey =
1481 					cpu_to_be32(wr->wr.atomic.rkey);
1482 				((struct mthca_raddr_seg *) wqe)->reserved = 0;
1483 
1484 				wqe += sizeof (struct mthca_raddr_seg);
1485 
1486 				if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1487 					((struct mthca_atomic_seg *) wqe)->swap_add =
1488 						cpu_to_be64(wr->wr.atomic.swap);
1489 					((struct mthca_atomic_seg *) wqe)->compare =
1490 						cpu_to_be64(wr->wr.atomic.compare_add);
1491 				} else {
1492 					((struct mthca_atomic_seg *) wqe)->swap_add =
1493 						cpu_to_be64(wr->wr.atomic.compare_add);
1494 					((struct mthca_atomic_seg *) wqe)->compare = 0;
1495 				}
1496 
1497 				wqe += sizeof (struct mthca_atomic_seg);
1498 				size += sizeof (struct mthca_raddr_seg) / 16 +
1499 					sizeof (struct mthca_atomic_seg);
1500 				break;
1501 
1502 			case IB_WR_RDMA_WRITE:
1503 			case IB_WR_RDMA_WRITE_WITH_IMM:
1504 			case IB_WR_RDMA_READ:
1505 				((struct mthca_raddr_seg *) wqe)->raddr =
1506 					cpu_to_be64(wr->wr.rdma.remote_addr);
1507 				((struct mthca_raddr_seg *) wqe)->rkey =
1508 					cpu_to_be32(wr->wr.rdma.rkey);
1509 				((struct mthca_raddr_seg *) wqe)->reserved = 0;
1510 				wqe += sizeof (struct mthca_raddr_seg);
1511 				size += sizeof (struct mthca_raddr_seg) / 16;
1512 				break;
1513 
1514 			default:
1515 				/* No extra segments required for sends */
1516 				break;
1517 			}
1518 
1519 			break;
1520 
1521 		case UD:
1522 			((struct mthca_tavor_ud_seg *) wqe)->lkey =
1523 				cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
1524 			((struct mthca_tavor_ud_seg *) wqe)->av_addr =
1525 				cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
1526 			((struct mthca_tavor_ud_seg *) wqe)->dqpn =
1527 				cpu_to_be32(wr->wr.ud.remote_qpn);
1528 			((struct mthca_tavor_ud_seg *) wqe)->qkey =
1529 				cpu_to_be32(wr->wr.ud.remote_qkey);
1530 
1531 			wqe += sizeof (struct mthca_tavor_ud_seg);
1532 			size += sizeof (struct mthca_tavor_ud_seg) / 16;
1533 			break;
1534 
1535 		case MLX:
1536 			err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1537 					       wqe - sizeof (struct mthca_next_seg),
1538 					       wqe);
1539 			if (err) {
1540 				*bad_wr = wr;
1541 				goto out;
1542 			}
1543 			wqe += sizeof (struct mthca_data_seg);
1544 			size += sizeof (struct mthca_data_seg) / 16;
1545 			break;
1546 		}
1547 
1548 		if (wr->num_sge > qp->sq.max_gs) {
1549 			mthca_err(dev, "too many gathers\n");
1550 			err = -EINVAL;
1551 			*bad_wr = wr;
1552 			goto out;
1553 		}
1554 
1555 		for (i = 0; i < wr->num_sge; ++i) {
1556 			((struct mthca_data_seg *) wqe)->byte_count =
1557 				cpu_to_be32(wr->sg_list[i].length);
1558 			((struct mthca_data_seg *) wqe)->lkey =
1559 				cpu_to_be32(wr->sg_list[i].lkey);
1560 			((struct mthca_data_seg *) wqe)->addr =
1561 				cpu_to_be64(wr->sg_list[i].addr);
1562 			wqe += sizeof (struct mthca_data_seg);
1563 			size += sizeof (struct mthca_data_seg) / 16;
1564 		}
1565 
1566 		/* Add one more inline data segment for ICRC */
1567 		if (qp->transport == MLX) {
1568 			((struct mthca_data_seg *) wqe)->byte_count =
1569 				cpu_to_be32((1 << 31) | 4);
1570 			((u32 *) wqe)[1] = 0;
1571 			wqe += sizeof (struct mthca_data_seg);
1572 			size += sizeof (struct mthca_data_seg) / 16;
1573 		}
1574 
1575 		qp->wrid[ind + qp->rq.max] = wr->wr_id;
1576 
1577 		if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
1578 			mthca_err(dev, "opcode invalid\n");
1579 			err = -EINVAL;
1580 			*bad_wr = wr;
1581 			goto out;
1582 		}
1583 
1584 		if (prev_wqe) {
1585 			((struct mthca_next_seg *) prev_wqe)->nda_op =
1586 				cpu_to_be32(((ind << qp->sq.wqe_shift) +
1587 					     qp->send_wqe_offset) |
1588 					    mthca_opcode[wr->opcode]);
1589 			wmb();
1590 			((struct mthca_next_seg *) prev_wqe)->ee_nds =
1591 				cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
1592 		}
1593 
1594 		if (!size0) {
1595 			size0 = size;
1596 			op0   = mthca_opcode[wr->opcode];
1597 		}
1598 
1599 		++ind;
1600 		if (unlikely(ind >= qp->sq.max))
1601 			ind -= qp->sq.max;
1602 	}
1603 
1604 out:
1605 	if (likely(nreq)) {
1606 		u32 doorbell[2];
1607 
1608 		doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
1609 					   qp->send_wqe_offset) | f0 | op0);
1610 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1611 
1612 		wmb();
1613 
1614 		mthca_write64(doorbell,
1615 			      dev->kar + MTHCA_SEND_DOORBELL,
1616 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1617 	}
1618 
1619 	qp->sq.next_ind = ind;
1620 	qp->sq.head    += nreq;
1621 
1622 	spin_unlock_irqrestore(&qp->sq.lock, flags);
1623 	return err;
1624 }
1625 
1626 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1627 			     struct ib_recv_wr **bad_wr)
1628 {
1629 	struct mthca_dev *dev = to_mdev(ibqp->device);
1630 	struct mthca_qp *qp = to_mqp(ibqp);
1631 	unsigned long flags;
1632 	int err = 0;
1633 	int nreq;
1634 	int i;
1635 	int size;
1636 	int size0 = 0;
1637 	int ind;
1638 	void *wqe;
1639 	void *prev_wqe;
1640 
1641 	spin_lock_irqsave(&qp->rq.lock, flags);
1642 
1643 	/* XXX check that state is OK to post receive */
1644 
1645 	ind = qp->rq.next_ind;
1646 
1647 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
1648 		if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1649 			mthca_err(dev, "RQ %06x full (%u head, %u tail,"
1650 					" %d max, %d nreq)\n", qp->qpn,
1651 					qp->rq.head, qp->rq.tail,
1652 					qp->rq.max, nreq);
1653 			err = -ENOMEM;
1654 			*bad_wr = wr;
1655 			goto out;
1656 		}
1657 
1658 		wqe = get_recv_wqe(qp, ind);
1659 		prev_wqe = qp->rq.last;
1660 		qp->rq.last = wqe;
1661 
1662 		((struct mthca_next_seg *) wqe)->nda_op = 0;
1663 		((struct mthca_next_seg *) wqe)->ee_nds =
1664 			cpu_to_be32(MTHCA_NEXT_DBD);
1665 		((struct mthca_next_seg *) wqe)->flags = 0;
1666 
1667 		wqe += sizeof (struct mthca_next_seg);
1668 		size = sizeof (struct mthca_next_seg) / 16;
1669 
1670 		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1671 			err = -EINVAL;
1672 			*bad_wr = wr;
1673 			goto out;
1674 		}
1675 
1676 		for (i = 0; i < wr->num_sge; ++i) {
1677 			((struct mthca_data_seg *) wqe)->byte_count =
1678 				cpu_to_be32(wr->sg_list[i].length);
1679 			((struct mthca_data_seg *) wqe)->lkey =
1680 				cpu_to_be32(wr->sg_list[i].lkey);
1681 			((struct mthca_data_seg *) wqe)->addr =
1682 				cpu_to_be64(wr->sg_list[i].addr);
1683 			wqe += sizeof (struct mthca_data_seg);
1684 			size += sizeof (struct mthca_data_seg) / 16;
1685 		}
1686 
1687 		qp->wrid[ind] = wr->wr_id;
1688 
1689 		if (likely(prev_wqe)) {
1690 			((struct mthca_next_seg *) prev_wqe)->nda_op =
1691 				cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
1692 			wmb();
1693 			((struct mthca_next_seg *) prev_wqe)->ee_nds =
1694 				cpu_to_be32(MTHCA_NEXT_DBD | size);
1695 		}
1696 
1697 		if (!size0)
1698 			size0 = size;
1699 
1700 		++ind;
1701 		if (unlikely(ind >= qp->rq.max))
1702 			ind -= qp->rq.max;
1703 	}
1704 
1705 out:
1706 	if (likely(nreq)) {
1707 		u32 doorbell[2];
1708 
1709 		doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1710 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
1711 
1712 		wmb();
1713 
1714 		mthca_write64(doorbell,
1715 			      dev->kar + MTHCA_RECEIVE_DOORBELL,
1716 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1717 	}
1718 
1719 	qp->rq.next_ind = ind;
1720 	qp->rq.head    += nreq;
1721 
1722 	spin_unlock_irqrestore(&qp->rq.lock, flags);
1723 	return err;
1724 }
1725 
1726 int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1727 			  struct ib_send_wr **bad_wr)
1728 {
1729 	struct mthca_dev *dev = to_mdev(ibqp->device);
1730 	struct mthca_qp *qp = to_mqp(ibqp);
1731 	void *wqe;
1732 	void *prev_wqe;
1733 	unsigned long flags;
1734 	int err = 0;
1735 	int nreq;
1736 	int i;
1737 	int size;
1738 	int size0 = 0;
1739 	u32 f0 = 0;
1740 	int ind;
1741 	u8 op0 = 0;
1742 
1743 	spin_lock_irqsave(&qp->sq.lock, flags);
1744 
1745 	/* XXX check that state is OK to post send */
1746 
1747 	ind = qp->sq.head & (qp->sq.max - 1);
1748 
1749 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
1750 		if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1751 			mthca_err(dev, "SQ %06x full (%u head, %u tail,"
1752 					" %d max, %d nreq)\n", qp->qpn,
1753 					qp->sq.head, qp->sq.tail,
1754 					qp->sq.max, nreq);
1755 			err = -ENOMEM;
1756 			*bad_wr = wr;
1757 			goto out;
1758 		}
1759 
1760 		wqe = get_send_wqe(qp, ind);
1761 		prev_wqe = qp->sq.last;
1762 		qp->sq.last = wqe;
1763 
1764 		((struct mthca_next_seg *) wqe)->flags =
1765 			((wr->send_flags & IB_SEND_SIGNALED) ?
1766 			 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1767 			((wr->send_flags & IB_SEND_SOLICITED) ?
1768 			 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
1769 			cpu_to_be32(1);
1770 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1771 		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1772 			((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
1773 
1774 		wqe += sizeof (struct mthca_next_seg);
1775 		size = sizeof (struct mthca_next_seg) / 16;
1776 
1777 		switch (qp->transport) {
1778 		case UD:
1779 			memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
1780 			       to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
1781 			((struct mthca_arbel_ud_seg *) wqe)->dqpn =
1782 				cpu_to_be32(wr->wr.ud.remote_qpn);
1783 			((struct mthca_arbel_ud_seg *) wqe)->qkey =
1784 				cpu_to_be32(wr->wr.ud.remote_qkey);
1785 
1786 			wqe += sizeof (struct mthca_arbel_ud_seg);
1787 			size += sizeof (struct mthca_arbel_ud_seg) / 16;
1788 			break;
1789 
1790 		case MLX:
1791 			err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1792 					       wqe - sizeof (struct mthca_next_seg),
1793 					       wqe);
1794 			if (err) {
1795 				*bad_wr = wr;
1796 				goto out;
1797 			}
1798 			wqe += sizeof (struct mthca_data_seg);
1799 			size += sizeof (struct mthca_data_seg) / 16;
1800 			break;
1801 		}
1802 
1803 		if (wr->num_sge > qp->sq.max_gs) {
1804 			mthca_err(dev, "too many gathers\n");
1805 			err = -EINVAL;
1806 			*bad_wr = wr;
1807 			goto out;
1808 		}
1809 
1810 		for (i = 0; i < wr->num_sge; ++i) {
1811 			((struct mthca_data_seg *) wqe)->byte_count =
1812 				cpu_to_be32(wr->sg_list[i].length);
1813 			((struct mthca_data_seg *) wqe)->lkey =
1814 				cpu_to_be32(wr->sg_list[i].lkey);
1815 			((struct mthca_data_seg *) wqe)->addr =
1816 				cpu_to_be64(wr->sg_list[i].addr);
1817 			wqe += sizeof (struct mthca_data_seg);
1818 			size += sizeof (struct mthca_data_seg) / 16;
1819 		}
1820 
1821 		/* Add one more inline data segment for ICRC */
1822 		if (qp->transport == MLX) {
1823 			((struct mthca_data_seg *) wqe)->byte_count =
1824 				cpu_to_be32((1 << 31) | 4);
1825 			((u32 *) wqe)[1] = 0;
1826 			wqe += sizeof (struct mthca_data_seg);
1827 			size += sizeof (struct mthca_data_seg) / 16;
1828 		}
1829 
1830 		qp->wrid[ind + qp->rq.max] = wr->wr_id;
1831 
1832 		if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
1833 			mthca_err(dev, "opcode invalid\n");
1834 			err = -EINVAL;
1835 			*bad_wr = wr;
1836 			goto out;
1837 		}
1838 
1839 		if (likely(prev_wqe)) {
1840 			((struct mthca_next_seg *) prev_wqe)->nda_op =
1841 				cpu_to_be32(((ind << qp->sq.wqe_shift) +
1842 					     qp->send_wqe_offset) |
1843 					    mthca_opcode[wr->opcode]);
1844 			wmb();
1845 			((struct mthca_next_seg *) prev_wqe)->ee_nds =
1846 				cpu_to_be32(MTHCA_NEXT_DBD | size);
1847 		}
1848 
1849 		if (!size0) {
1850 			size0 = size;
1851 			op0   = mthca_opcode[wr->opcode];
1852 		}
1853 
1854 		++ind;
1855 		if (unlikely(ind >= qp->sq.max))
1856 			ind -= qp->sq.max;
1857 	}
1858 
1859 out:
1860 	if (likely(nreq)) {
1861 		u32 doorbell[2];
1862 
1863 		doorbell[0] = cpu_to_be32((nreq << 24)                  |
1864 					  ((qp->sq.head & 0xffff) << 8) |
1865 					  f0 | op0);
1866 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1867 
1868 		qp->sq.head += nreq;
1869 
1870 		/*
1871 		 * Make sure that descriptors are written before
1872 		 * doorbell record.
1873 		 */
1874 		wmb();
1875 		*qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
1876 
1877 		/*
1878 		 * Make sure doorbell record is written before we
1879 		 * write MMIO send doorbell.
1880 		 */
1881 		wmb();
1882 		mthca_write64(doorbell,
1883 			      dev->kar + MTHCA_SEND_DOORBELL,
1884 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1885 	}
1886 
1887 	spin_unlock_irqrestore(&qp->sq.lock, flags);
1888 	return err;
1889 }
1890 
1891 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1892 			     struct ib_recv_wr **bad_wr)
1893 {
1894 	struct mthca_dev *dev = to_mdev(ibqp->device);
1895 	struct mthca_qp *qp = to_mqp(ibqp);
1896 	unsigned long flags;
1897 	int err = 0;
1898 	int nreq;
1899 	int ind;
1900 	int i;
1901 	void *wqe;
1902 
1903  	spin_lock_irqsave(&qp->rq.lock, flags);
1904 
1905 	/* XXX check that state is OK to post receive */
1906 
1907 	ind = qp->rq.head & (qp->rq.max - 1);
1908 
1909 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
1910 		if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1911 			mthca_err(dev, "RQ %06x full (%u head, %u tail,"
1912 					" %d max, %d nreq)\n", qp->qpn,
1913 					qp->rq.head, qp->rq.tail,
1914 					qp->rq.max, nreq);
1915 			err = -ENOMEM;
1916 			*bad_wr = wr;
1917 			goto out;
1918 		}
1919 
1920 		wqe = get_recv_wqe(qp, ind);
1921 
1922 		((struct mthca_next_seg *) wqe)->flags = 0;
1923 
1924 		wqe += sizeof (struct mthca_next_seg);
1925 
1926 		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1927 			err = -EINVAL;
1928 			*bad_wr = wr;
1929 			goto out;
1930 		}
1931 
1932 		for (i = 0; i < wr->num_sge; ++i) {
1933 			((struct mthca_data_seg *) wqe)->byte_count =
1934 				cpu_to_be32(wr->sg_list[i].length);
1935 			((struct mthca_data_seg *) wqe)->lkey =
1936 				cpu_to_be32(wr->sg_list[i].lkey);
1937 			((struct mthca_data_seg *) wqe)->addr =
1938 				cpu_to_be64(wr->sg_list[i].addr);
1939 			wqe += sizeof (struct mthca_data_seg);
1940 		}
1941 
1942 		if (i < qp->rq.max_gs) {
1943 			((struct mthca_data_seg *) wqe)->byte_count = 0;
1944 			((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(0x100);
1945 			((struct mthca_data_seg *) wqe)->addr = 0;
1946 		}
1947 
1948 		qp->wrid[ind] = wr->wr_id;
1949 
1950 		++ind;
1951 		if (unlikely(ind >= qp->rq.max))
1952 			ind -= qp->rq.max;
1953 	}
1954 out:
1955 	if (likely(nreq)) {
1956 		qp->rq.head += nreq;
1957 
1958 		/*
1959 		 * Make sure that descriptors are written before
1960 		 * doorbell record.
1961 		 */
1962 		wmb();
1963 		*qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
1964 	}
1965 
1966 	spin_unlock_irqrestore(&qp->rq.lock, flags);
1967 	return err;
1968 }
1969 
1970 int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
1971 		       int index, int *dbd, u32 *new_wqe)
1972 {
1973 	struct mthca_next_seg *next;
1974 
1975 	if (is_send)
1976 		next = get_send_wqe(qp, index);
1977 	else
1978 		next = get_recv_wqe(qp, index);
1979 
1980 	if (dev->hca_type == ARBEL_NATIVE)
1981 		*dbd = 1;
1982 	else
1983 		*dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
1984 	if (next->ee_nds & cpu_to_be32(0x3f))
1985 		*new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
1986 			(next->ee_nds & cpu_to_be32(0x3f));
1987 	else
1988 		*new_wqe = 0;
1989 
1990 	return 0;
1991 }
1992 
1993 int __devinit mthca_init_qp_table(struct mthca_dev *dev)
1994 {
1995 	int err;
1996 	u8 status;
1997 	int i;
1998 
1999 	spin_lock_init(&dev->qp_table.lock);
2000 
2001 	/*
2002 	 * We reserve 2 extra QPs per port for the special QPs.  The
2003 	 * special QP for port 1 has to be even, so round up.
2004 	 */
2005 	dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
2006 	err = mthca_alloc_init(&dev->qp_table.alloc,
2007 			       dev->limits.num_qps,
2008 			       (1 << 24) - 1,
2009 			       dev->qp_table.sqp_start +
2010 			       MTHCA_MAX_PORTS * 2);
2011 	if (err)
2012 		return err;
2013 
2014 	err = mthca_array_init(&dev->qp_table.qp,
2015 			       dev->limits.num_qps);
2016 	if (err) {
2017 		mthca_alloc_cleanup(&dev->qp_table.alloc);
2018 		return err;
2019 	}
2020 
2021 	for (i = 0; i < 2; ++i) {
2022 		err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
2023 					    dev->qp_table.sqp_start + i * 2,
2024 					    &status);
2025 		if (err)
2026 			goto err_out;
2027 		if (status) {
2028 			mthca_warn(dev, "CONF_SPECIAL_QP returned "
2029 				   "status %02x, aborting.\n",
2030 				   status);
2031 			err = -EINVAL;
2032 			goto err_out;
2033 		}
2034 	}
2035 	return 0;
2036 
2037  err_out:
2038 	for (i = 0; i < 2; ++i)
2039 		mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
2040 
2041 	mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2042 	mthca_alloc_cleanup(&dev->qp_table.alloc);
2043 
2044 	return err;
2045 }
2046 
2047 void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev)
2048 {
2049 	int i;
2050 	u8 status;
2051 
2052 	for (i = 0; i < 2; ++i)
2053 		mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
2054 
2055 	mthca_alloc_cleanup(&dev->qp_table.alloc);
2056 }
2057