1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/errno.h>
35 #include <linux/interrupt.h>
36 #include <linux/pci.h>
37 
38 #include "mthca_dev.h"
39 #include "mthca_cmd.h"
40 #include "mthca_config_reg.h"
41 
42 enum {
43 	MTHCA_NUM_ASYNC_EQE = 0x80,
44 	MTHCA_NUM_CMD_EQE   = 0x80,
45 	MTHCA_NUM_SPARE_EQE = 0x80,
46 	MTHCA_EQ_ENTRY_SIZE = 0x20
47 };
48 
49 /*
50  * Must be packed because start is 64 bits but only aligned to 32 bits.
51  */
52 struct mthca_eq_context {
53 	__be32 flags;
54 	__be64 start;
55 	__be32 logsize_usrpage;
56 	__be32 tavor_pd;	/* reserved for Arbel */
57 	u8     reserved1[3];
58 	u8     intr;
59 	__be32 arbel_pd;	/* lost_count for Tavor */
60 	__be32 lkey;
61 	u32    reserved2[2];
62 	__be32 consumer_index;
63 	__be32 producer_index;
64 	u32    reserved3[4];
65 } __attribute__((packed));
66 
67 #define MTHCA_EQ_STATUS_OK          ( 0 << 28)
68 #define MTHCA_EQ_STATUS_OVERFLOW    ( 9 << 28)
69 #define MTHCA_EQ_STATUS_WRITE_FAIL  (10 << 28)
70 #define MTHCA_EQ_OWNER_SW           ( 0 << 24)
71 #define MTHCA_EQ_OWNER_HW           ( 1 << 24)
72 #define MTHCA_EQ_FLAG_TR            ( 1 << 18)
73 #define MTHCA_EQ_FLAG_OI            ( 1 << 17)
74 #define MTHCA_EQ_STATE_ARMED        ( 1 <<  8)
75 #define MTHCA_EQ_STATE_FIRED        ( 2 <<  8)
76 #define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 <<  8)
77 #define MTHCA_EQ_STATE_ARBEL        ( 8 <<  8)
78 
79 enum {
80 	MTHCA_EVENT_TYPE_COMP       	    = 0x00,
81 	MTHCA_EVENT_TYPE_PATH_MIG   	    = 0x01,
82 	MTHCA_EVENT_TYPE_COMM_EST   	    = 0x02,
83 	MTHCA_EVENT_TYPE_SQ_DRAINED 	    = 0x03,
84 	MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE    = 0x13,
85 	MTHCA_EVENT_TYPE_SRQ_LIMIT	    = 0x14,
86 	MTHCA_EVENT_TYPE_CQ_ERROR   	    = 0x04,
87 	MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
88 	MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
89 	MTHCA_EVENT_TYPE_PATH_MIG_FAILED    = 0x07,
90 	MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
91 	MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR    = 0x11,
92 	MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR    = 0x12,
93 	MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR  = 0x08,
94 	MTHCA_EVENT_TYPE_PORT_CHANGE        = 0x09,
95 	MTHCA_EVENT_TYPE_EQ_OVERFLOW        = 0x0f,
96 	MTHCA_EVENT_TYPE_ECC_DETECT         = 0x0e,
97 	MTHCA_EVENT_TYPE_CMD                = 0x0a
98 };
99 
100 #define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG)           | \
101 				(1ULL << MTHCA_EVENT_TYPE_COMM_EST)           | \
102 				(1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED)         | \
103 				(1ULL << MTHCA_EVENT_TYPE_CQ_ERROR)           | \
104 				(1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR)     | \
105 				(1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR)    | \
106 				(1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED)    | \
107 				(1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
108 				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
109 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
110 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
111 				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
112 #define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
113 				(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
114 				(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
115 #define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
116 
117 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
118 #define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
119 #define MTHCA_EQ_DB_DISARM_CQ  (3 << 24)
120 #define MTHCA_EQ_DB_SET_CI     (4 << 24)
121 #define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
122 
123 struct mthca_eqe {
124 	u8 reserved1;
125 	u8 type;
126 	u8 reserved2;
127 	u8 subtype;
128 	union {
129 		u32 raw[6];
130 		struct {
131 			__be32 cqn;
132 		} __attribute__((packed)) comp;
133 		struct {
134 			u16    reserved1;
135 			__be16 token;
136 			u32    reserved2;
137 			u8     reserved3[3];
138 			u8     status;
139 			__be64 out_param;
140 		} __attribute__((packed)) cmd;
141 		struct {
142 			__be32 qpn;
143 		} __attribute__((packed)) qp;
144 		struct {
145 			__be32 srqn;
146 		} __attribute__((packed)) srq;
147 		struct {
148 			__be32 cqn;
149 			u32    reserved1;
150 			u8     reserved2[3];
151 			u8     syndrome;
152 		} __attribute__((packed)) cq_err;
153 		struct {
154 			u32    reserved1[2];
155 			__be32 port;
156 		} __attribute__((packed)) port_change;
157 	} event;
158 	u8 reserved3[3];
159 	u8 owner;
160 } __attribute__((packed));
161 
162 #define  MTHCA_EQ_ENTRY_OWNER_SW      (0 << 7)
163 #define  MTHCA_EQ_ENTRY_OWNER_HW      (1 << 7)
164 
165 static inline u64 async_mask(struct mthca_dev *dev)
166 {
167 	return dev->mthca_flags & MTHCA_FLAG_SRQ ?
168 		MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
169 		MTHCA_ASYNC_EVENT_MASK;
170 }
171 
172 static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
173 {
174 	/*
175 	 * This barrier makes sure that all updates to ownership bits
176 	 * done by set_eqe_hw() hit memory before the consumer index
177 	 * is updated.  set_eq_ci() allows the HCA to possibly write
178 	 * more EQ entries, and we want to avoid the exceedingly
179 	 * unlikely possibility of the HCA writing an entry and then
180 	 * having set_eqe_hw() overwrite the owner field.
181 	 */
182 	wmb();
183 	mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
184 		      dev->kar + MTHCA_EQ_DOORBELL,
185 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
186 }
187 
188 static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
189 {
190 	/* See comment in tavor_set_eq_ci() above. */
191 	wmb();
192 	__raw_writel((__force u32) cpu_to_be32(ci),
193 		     dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
194 	/* We still want ordering, just not swabbing, so add a barrier */
195 	mb();
196 }
197 
198 static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
199 {
200 	if (mthca_is_memfree(dev))
201 		arbel_set_eq_ci(dev, eq, ci);
202 	else
203 		tavor_set_eq_ci(dev, eq, ci);
204 }
205 
206 static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
207 {
208 	mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
209 		      dev->kar + MTHCA_EQ_DOORBELL,
210 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
211 }
212 
213 static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
214 {
215 	writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
216 }
217 
218 static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
219 {
220 	if (!mthca_is_memfree(dev)) {
221 		mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
222 			      dev->kar + MTHCA_EQ_DOORBELL,
223 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
224 	}
225 }
226 
227 static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
228 {
229 	unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
230 	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
231 }
232 
233 static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
234 {
235 	struct mthca_eqe *eqe;
236 	eqe = get_eqe(eq, eq->cons_index);
237 	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
238 }
239 
240 static inline void set_eqe_hw(struct mthca_eqe *eqe)
241 {
242 	eqe->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
243 }
244 
245 static void port_change(struct mthca_dev *dev, int port, int active)
246 {
247 	struct ib_event record;
248 
249 	mthca_dbg(dev, "Port change to %s for port %d\n",
250 		  active ? "active" : "down", port);
251 
252 	record.device = &dev->ib_dev;
253 	record.event  = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
254 	record.element.port_num = port;
255 
256 	ib_dispatch_event(&record);
257 }
258 
259 static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
260 {
261 	struct mthca_eqe *eqe;
262 	int disarm_cqn;
263 	int eqes_found = 0;
264 	int set_ci = 0;
265 
266 	while ((eqe = next_eqe_sw(eq))) {
267 		/*
268 		 * Make sure we read EQ entry contents after we've
269 		 * checked the ownership bit.
270 		 */
271 		rmb();
272 
273 		switch (eqe->type) {
274 		case MTHCA_EVENT_TYPE_COMP:
275 			disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
276 			disarm_cq(dev, eq->eqn, disarm_cqn);
277 			mthca_cq_completion(dev, disarm_cqn);
278 			break;
279 
280 		case MTHCA_EVENT_TYPE_PATH_MIG:
281 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
282 				       IB_EVENT_PATH_MIG);
283 			break;
284 
285 		case MTHCA_EVENT_TYPE_COMM_EST:
286 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
287 				       IB_EVENT_COMM_EST);
288 			break;
289 
290 		case MTHCA_EVENT_TYPE_SQ_DRAINED:
291 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
292 				       IB_EVENT_SQ_DRAINED);
293 			break;
294 
295 		case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
296 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
297 				       IB_EVENT_QP_LAST_WQE_REACHED);
298 			break;
299 
300 		case MTHCA_EVENT_TYPE_SRQ_LIMIT:
301 			mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
302 					IB_EVENT_SRQ_LIMIT_REACHED);
303 			break;
304 
305 		case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
306 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
307 				       IB_EVENT_QP_FATAL);
308 			break;
309 
310 		case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
311 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
312 				       IB_EVENT_PATH_MIG_ERR);
313 			break;
314 
315 		case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
316 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
317 				       IB_EVENT_QP_REQ_ERR);
318 			break;
319 
320 		case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
321 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
322 				       IB_EVENT_QP_ACCESS_ERR);
323 			break;
324 
325 		case MTHCA_EVENT_TYPE_CMD:
326 			mthca_cmd_event(dev,
327 					be16_to_cpu(eqe->event.cmd.token),
328 					eqe->event.cmd.status,
329 					be64_to_cpu(eqe->event.cmd.out_param));
330 			break;
331 
332 		case MTHCA_EVENT_TYPE_PORT_CHANGE:
333 			port_change(dev,
334 				    (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
335 				    eqe->subtype == 0x4);
336 			break;
337 
338 		case MTHCA_EVENT_TYPE_CQ_ERROR:
339 			mthca_warn(dev, "CQ %s on CQN %06x\n",
340 				   eqe->event.cq_err.syndrome == 1 ?
341 				   "overrun" : "access violation",
342 				   be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
343 			mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
344 				       IB_EVENT_CQ_ERR);
345 			break;
346 
347 		case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
348 			mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
349 			break;
350 
351 		case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
352 		case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
353 		case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
354 		case MTHCA_EVENT_TYPE_ECC_DETECT:
355 		default:
356 			mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
357 				   eqe->type, eqe->subtype, eq->eqn);
358 			break;
359 		};
360 
361 		set_eqe_hw(eqe);
362 		++eq->cons_index;
363 		eqes_found = 1;
364 		++set_ci;
365 
366 		/*
367 		 * The HCA will think the queue has overflowed if we
368 		 * don't tell it we've been processing events.  We
369 		 * create our EQs with MTHCA_NUM_SPARE_EQE extra
370 		 * entries, so we must update our consumer index at
371 		 * least that often.
372 		 */
373 		if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) {
374 			/*
375 			 * Conditional on hca_type is OK here because
376 			 * this is a rare case, not the fast path.
377 			 */
378 			set_eq_ci(dev, eq, eq->cons_index);
379 			set_ci = 0;
380 		}
381 	}
382 
383 	/*
384 	 * Rely on caller to set consumer index so that we don't have
385 	 * to test hca_type in our interrupt handling fast path.
386 	 */
387 	return eqes_found;
388 }
389 
390 static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
391 {
392 	struct mthca_dev *dev = dev_ptr;
393 	u32 ecr;
394 	int i;
395 
396 	if (dev->eq_table.clr_mask)
397 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
398 
399 	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
400 	if (!ecr)
401 		return IRQ_NONE;
402 
403 	writel(ecr, dev->eq_regs.tavor.ecr_base +
404 	       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
405 
406 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
407 		if (ecr & dev->eq_table.eq[i].eqn_mask) {
408 			if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
409 				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
410 						dev->eq_table.eq[i].cons_index);
411 			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
412 		}
413 
414 	return IRQ_HANDLED;
415 }
416 
417 static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
418 {
419 	struct mthca_eq  *eq  = eq_ptr;
420 	struct mthca_dev *dev = eq->dev;
421 
422 	mthca_eq_int(dev, eq);
423 	tavor_set_eq_ci(dev, eq, eq->cons_index);
424 	tavor_eq_req_not(dev, eq->eqn);
425 
426 	/* MSI-X vectors always belong to us */
427 	return IRQ_HANDLED;
428 }
429 
430 static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
431 {
432 	struct mthca_dev *dev = dev_ptr;
433 	int work = 0;
434 	int i;
435 
436 	if (dev->eq_table.clr_mask)
437 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
438 
439 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
440 		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
441 			work = 1;
442 			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
443 					dev->eq_table.eq[i].cons_index);
444 		}
445 
446 	arbel_eq_req_not(dev, dev->eq_table.arm_mask);
447 
448 	return IRQ_RETVAL(work);
449 }
450 
451 static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
452 {
453 	struct mthca_eq  *eq  = eq_ptr;
454 	struct mthca_dev *dev = eq->dev;
455 
456 	mthca_eq_int(dev, eq);
457 	arbel_set_eq_ci(dev, eq, eq->cons_index);
458 	arbel_eq_req_not(dev, eq->eqn_mask);
459 
460 	/* MSI-X vectors always belong to us */
461 	return IRQ_HANDLED;
462 }
463 
464 static int mthca_create_eq(struct mthca_dev *dev,
465 			   int nent,
466 			   u8 intr,
467 			   struct mthca_eq *eq)
468 {
469 	int npages;
470 	u64 *dma_list = NULL;
471 	dma_addr_t t;
472 	struct mthca_mailbox *mailbox;
473 	struct mthca_eq_context *eq_context;
474 	int err = -ENOMEM;
475 	int i;
476 	u8 status;
477 
478 	eq->dev  = dev;
479 	eq->nent = roundup_pow_of_two(max(nent, 2));
480 	npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
481 
482 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
483 				GFP_KERNEL);
484 	if (!eq->page_list)
485 		goto err_out;
486 
487 	for (i = 0; i < npages; ++i)
488 		eq->page_list[i].buf = NULL;
489 
490 	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
491 	if (!dma_list)
492 		goto err_out_free;
493 
494 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
495 	if (IS_ERR(mailbox))
496 		goto err_out_free;
497 	eq_context = mailbox->buf;
498 
499 	for (i = 0; i < npages; ++i) {
500 		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
501 							  PAGE_SIZE, &t, GFP_KERNEL);
502 		if (!eq->page_list[i].buf)
503 			goto err_out_free_pages;
504 
505 		dma_list[i] = t;
506 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
507 
508 		clear_page(eq->page_list[i].buf);
509 	}
510 
511 	for (i = 0; i < eq->nent; ++i)
512 		set_eqe_hw(get_eqe(eq, i));
513 
514 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
515 	if (eq->eqn == -1)
516 		goto err_out_free_pages;
517 
518 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
519 				  dma_list, PAGE_SHIFT, npages,
520 				  0, npages * PAGE_SIZE,
521 				  MTHCA_MPT_FLAG_LOCAL_WRITE |
522 				  MTHCA_MPT_FLAG_LOCAL_READ,
523 				  &eq->mr);
524 	if (err)
525 		goto err_out_free_eq;
526 
527 	memset(eq_context, 0, sizeof *eq_context);
528 	eq_context->flags           = cpu_to_be32(MTHCA_EQ_STATUS_OK   |
529 						  MTHCA_EQ_OWNER_HW    |
530 						  MTHCA_EQ_STATE_ARMED |
531 						  MTHCA_EQ_FLAG_TR);
532 	if (mthca_is_memfree(dev))
533 		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
534 
535 	eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
536 	if (mthca_is_memfree(dev)) {
537 		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
538 	} else {
539 		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
540 		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
541 	}
542 	eq_context->intr            = intr;
543 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
544 
545 	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
546 	if (err) {
547 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
548 		goto err_out_free_mr;
549 	}
550 	if (status) {
551 		mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
552 			   status);
553 		err = -EINVAL;
554 		goto err_out_free_mr;
555 	}
556 
557 	kfree(dma_list);
558 	mthca_free_mailbox(dev, mailbox);
559 
560 	eq->eqn_mask   = swab32(1 << eq->eqn);
561 	eq->cons_index = 0;
562 
563 	dev->eq_table.arm_mask |= eq->eqn_mask;
564 
565 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
566 		  eq->eqn, eq->nent);
567 
568 	return err;
569 
570  err_out_free_mr:
571 	mthca_free_mr(dev, &eq->mr);
572 
573  err_out_free_eq:
574 	mthca_free(&dev->eq_table.alloc, eq->eqn);
575 
576  err_out_free_pages:
577 	for (i = 0; i < npages; ++i)
578 		if (eq->page_list[i].buf)
579 			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
580 					  eq->page_list[i].buf,
581 					  pci_unmap_addr(&eq->page_list[i],
582 							 mapping));
583 
584 	mthca_free_mailbox(dev, mailbox);
585 
586  err_out_free:
587 	kfree(eq->page_list);
588 	kfree(dma_list);
589 
590  err_out:
591 	return err;
592 }
593 
594 static void mthca_free_eq(struct mthca_dev *dev,
595 			  struct mthca_eq *eq)
596 {
597 	struct mthca_mailbox *mailbox;
598 	int err;
599 	u8 status;
600 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
601 		PAGE_SIZE;
602 	int i;
603 
604 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
605 	if (IS_ERR(mailbox))
606 		return;
607 
608 	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
609 	if (err)
610 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
611 	if (status)
612 		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
613 
614 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
615 
616 	if (0) {
617 		mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
618 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
619 			if (i % 4 == 0)
620 				printk("[%02x] ", i * 4);
621 			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
622 			if ((i + 1) % 4 == 0)
623 				printk("\n");
624 		}
625 	}
626 
627 	mthca_free_mr(dev, &eq->mr);
628 	for (i = 0; i < npages; ++i)
629 		pci_free_consistent(dev->pdev, PAGE_SIZE,
630 				    eq->page_list[i].buf,
631 				    pci_unmap_addr(&eq->page_list[i], mapping));
632 
633 	kfree(eq->page_list);
634 	mthca_free_mailbox(dev, mailbox);
635 }
636 
637 static void mthca_free_irqs(struct mthca_dev *dev)
638 {
639 	int i;
640 
641 	if (dev->eq_table.have_irq)
642 		free_irq(dev->pdev->irq, dev);
643 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
644 		if (dev->eq_table.eq[i].have_irq) {
645 			free_irq(dev->eq_table.eq[i].msi_x_vector,
646 				 dev->eq_table.eq + i);
647 			dev->eq_table.eq[i].have_irq = 0;
648 		}
649 }
650 
651 static int mthca_map_reg(struct mthca_dev *dev,
652 			 unsigned long offset, unsigned long size,
653 			 void __iomem **map)
654 {
655 	unsigned long base = pci_resource_start(dev->pdev, 0);
656 
657 	*map = ioremap(base + offset, size);
658 	if (!*map)
659 		return -ENOMEM;
660 
661 	return 0;
662 }
663 
664 static int mthca_map_eq_regs(struct mthca_dev *dev)
665 {
666 	if (mthca_is_memfree(dev)) {
667 		/*
668 		 * We assume that the EQ arm and EQ set CI registers
669 		 * fall within the first BAR.  We can't trust the
670 		 * values firmware gives us, since those addresses are
671 		 * valid on the HCA's side of the PCI bus but not
672 		 * necessarily the host side.
673 		 */
674 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
675 				  dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
676 				  &dev->clr_base)) {
677 			mthca_err(dev, "Couldn't map interrupt clear register, "
678 				  "aborting.\n");
679 			return -ENOMEM;
680 		}
681 
682 		/*
683 		 * Add 4 because we limit ourselves to EQs 0 ... 31,
684 		 * so we only need the low word of the register.
685 		 */
686 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
687 					dev->fw.arbel.eq_arm_base) + 4, 4,
688 				  &dev->eq_regs.arbel.eq_arm)) {
689 			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
690 			iounmap(dev->clr_base);
691 			return -ENOMEM;
692 		}
693 
694 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
695 				  dev->fw.arbel.eq_set_ci_base,
696 				  MTHCA_EQ_SET_CI_SIZE,
697 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
698 			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
699 			iounmap(dev->eq_regs.arbel.eq_arm);
700 			iounmap(dev->clr_base);
701 			return -ENOMEM;
702 		}
703 	} else {
704 		if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
705 				  &dev->clr_base)) {
706 			mthca_err(dev, "Couldn't map interrupt clear register, "
707 				  "aborting.\n");
708 			return -ENOMEM;
709 		}
710 
711 		if (mthca_map_reg(dev, MTHCA_ECR_BASE,
712 				  MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
713 				  &dev->eq_regs.tavor.ecr_base)) {
714 			mthca_err(dev, "Couldn't map ecr register, "
715 				  "aborting.\n");
716 			iounmap(dev->clr_base);
717 			return -ENOMEM;
718 		}
719 	}
720 
721 	return 0;
722 
723 }
724 
725 static void mthca_unmap_eq_regs(struct mthca_dev *dev)
726 {
727 	if (mthca_is_memfree(dev)) {
728 		iounmap(dev->eq_regs.arbel.eq_set_ci_base);
729 		iounmap(dev->eq_regs.arbel.eq_arm);
730 		iounmap(dev->clr_base);
731 	} else {
732 		iounmap(dev->eq_regs.tavor.ecr_base);
733 		iounmap(dev->clr_base);
734 	}
735 }
736 
737 int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
738 {
739 	int ret;
740 	u8 status;
741 
742 	/*
743 	 * We assume that mapping one page is enough for the whole EQ
744 	 * context table.  This is fine with all current HCAs, because
745 	 * we only use 32 EQs and each EQ uses 32 bytes of context
746 	 * memory, or 1 KB total.
747 	 */
748 	dev->eq_table.icm_virt = icm_virt;
749 	dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
750 	if (!dev->eq_table.icm_page)
751 		return -ENOMEM;
752 	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
753 					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
754 	if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
755 		__free_page(dev->eq_table.icm_page);
756 		return -ENOMEM;
757 	}
758 
759 	ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
760 	if (!ret && status)
761 		ret = -EINVAL;
762 	if (ret) {
763 		pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
764 			       PCI_DMA_BIDIRECTIONAL);
765 		__free_page(dev->eq_table.icm_page);
766 	}
767 
768 	return ret;
769 }
770 
771 void mthca_unmap_eq_icm(struct mthca_dev *dev)
772 {
773 	u8 status;
774 
775 	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
776 	pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
777 		       PCI_DMA_BIDIRECTIONAL);
778 	__free_page(dev->eq_table.icm_page);
779 }
780 
781 int mthca_init_eq_table(struct mthca_dev *dev)
782 {
783 	int err;
784 	u8 status;
785 	u8 intr;
786 	int i;
787 
788 	err = mthca_alloc_init(&dev->eq_table.alloc,
789 			       dev->limits.num_eqs,
790 			       dev->limits.num_eqs - 1,
791 			       dev->limits.reserved_eqs);
792 	if (err)
793 		return err;
794 
795 	err = mthca_map_eq_regs(dev);
796 	if (err)
797 		goto err_out_free;
798 
799 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
800 		dev->eq_table.clr_mask = 0;
801 	} else {
802 		dev->eq_table.clr_mask =
803 			swab32(1 << (dev->eq_table.inta_pin & 31));
804 		dev->eq_table.clr_int  = dev->clr_base +
805 			(dev->eq_table.inta_pin < 32 ? 4 : 0);
806 	}
807 
808 	dev->eq_table.arm_mask = 0;
809 
810 	intr = dev->eq_table.inta_pin;
811 
812 	err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
813 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
814 			      &dev->eq_table.eq[MTHCA_EQ_COMP]);
815 	if (err)
816 		goto err_out_unmap;
817 
818 	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
819 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
820 			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
821 	if (err)
822 		goto err_out_comp;
823 
824 	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
825 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
826 			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
827 	if (err)
828 		goto err_out_async;
829 
830 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
831 		static const char *eq_name[] = {
832 			[MTHCA_EQ_COMP]  = DRV_NAME "-comp",
833 			[MTHCA_EQ_ASYNC] = DRV_NAME "-async",
834 			[MTHCA_EQ_CMD]   = DRV_NAME "-cmd"
835 		};
836 
837 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
838 			snprintf(dev->eq_table.eq[i].irq_name,
839 				 IB_DEVICE_NAME_MAX,
840 				 "%s@pci:%s", eq_name[i],
841 				 pci_name(dev->pdev));
842 			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
843 					  mthca_is_memfree(dev) ?
844 					  mthca_arbel_msi_x_interrupt :
845 					  mthca_tavor_msi_x_interrupt,
846 					  0, dev->eq_table.eq[i].irq_name,
847 					  dev->eq_table.eq + i);
848 			if (err)
849 				goto err_out_cmd;
850 			dev->eq_table.eq[i].have_irq = 1;
851 		}
852 	} else {
853 		snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
854 			 DRV_NAME "@pci:%s", pci_name(dev->pdev));
855 		err = request_irq(dev->pdev->irq,
856 				  mthca_is_memfree(dev) ?
857 				  mthca_arbel_interrupt :
858 				  mthca_tavor_interrupt,
859 				  IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
860 		if (err)
861 			goto err_out_cmd;
862 		dev->eq_table.have_irq = 1;
863 	}
864 
865 	err = mthca_MAP_EQ(dev, async_mask(dev),
866 			   0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
867 	if (err)
868 		mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
869 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
870 	if (status)
871 		mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
872 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
873 
874 	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
875 			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
876 	if (err)
877 		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
878 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
879 	if (status)
880 		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
881 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
882 
883 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
884 		if (mthca_is_memfree(dev))
885 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
886 		else
887 			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
888 
889 	return 0;
890 
891 err_out_cmd:
892 	mthca_free_irqs(dev);
893 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
894 
895 err_out_async:
896 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
897 
898 err_out_comp:
899 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
900 
901 err_out_unmap:
902 	mthca_unmap_eq_regs(dev);
903 
904 err_out_free:
905 	mthca_alloc_cleanup(&dev->eq_table.alloc);
906 	return err;
907 }
908 
909 void mthca_cleanup_eq_table(struct mthca_dev *dev)
910 {
911 	u8 status;
912 	int i;
913 
914 	mthca_free_irqs(dev);
915 
916 	mthca_MAP_EQ(dev, async_mask(dev),
917 		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
918 	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
919 		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
920 
921 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
922 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
923 
924 	mthca_unmap_eq_regs(dev);
925 
926 	mthca_alloc_cleanup(&dev->eq_table.alloc);
927 }
928