1 /*
2  * Copyright (c) 2013 Intel Corporation. All rights reserved.
3  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <linux/spinlock.h>
36 #include <linux/pci.h>
37 #include <linux/io.h>
38 #include <linux/delay.h>
39 #include <linux/netdevice.h>
40 #include <linux/vmalloc.h>
41 #include <linux/module.h>
42 #include <linux/prefetch.h>
43 
44 #include "qib.h"
45 
46 /*
47  * The size has to be longer than this string, so we can append
48  * board/chip information to it in the init code.
49  */
50 const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
51 
52 DEFINE_SPINLOCK(qib_devs_lock);
53 LIST_HEAD(qib_dev_list);
54 DEFINE_MUTEX(qib_mutex);	/* general driver use */
55 
56 unsigned qib_ibmtu;
57 module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
58 MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
59 
60 unsigned qib_compat_ddr_negotiate = 1;
61 module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
62 		   S_IWUSR | S_IRUGO);
63 MODULE_PARM_DESC(compat_ddr_negotiate,
64 		 "Attempt pre-IBTA 1.2 DDR speed negotiation");
65 
66 MODULE_LICENSE("Dual BSD/GPL");
67 MODULE_AUTHOR("Intel <ibsupport@intel.com>");
68 MODULE_DESCRIPTION("Intel IB driver");
69 MODULE_VERSION(QIB_DRIVER_VERSION);
70 
71 /*
72  * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
73  * PIO send buffers.  This is well beyond anything currently
74  * defined in the InfiniBand spec.
75  */
76 #define QIB_PIO_MAXIBHDR 128
77 
78 /*
79  * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
80  */
81 #define QIB_MAX_PKT_RECV 64
82 
83 struct qlogic_ib_stats qib_stats;
84 
85 const char *qib_get_unit_name(int unit)
86 {
87 	static char iname[16];
88 
89 	snprintf(iname, sizeof(iname), "infinipath%u", unit);
90 	return iname;
91 }
92 
93 const char *qib_get_card_name(struct rvt_dev_info *rdi)
94 {
95 	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
96 	struct qib_devdata *dd = container_of(ibdev,
97 					      struct qib_devdata, verbs_dev);
98 	return qib_get_unit_name(dd->unit);
99 }
100 
101 struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
102 {
103 	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
104 	struct qib_devdata *dd = container_of(ibdev,
105 					      struct qib_devdata, verbs_dev);
106 	return dd->pcidev;
107 }
108 
109 /*
110  * Return count of units with at least one port ACTIVE.
111  */
112 int qib_count_active_units(void)
113 {
114 	struct qib_devdata *dd;
115 	struct qib_pportdata *ppd;
116 	unsigned long flags;
117 	int pidx, nunits_active = 0;
118 
119 	spin_lock_irqsave(&qib_devs_lock, flags);
120 	list_for_each_entry(dd, &qib_dev_list, list) {
121 		if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
122 			continue;
123 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
124 			ppd = dd->pport + pidx;
125 			if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
126 					 QIBL_LINKARMED | QIBL_LINKACTIVE))) {
127 				nunits_active++;
128 				break;
129 			}
130 		}
131 	}
132 	spin_unlock_irqrestore(&qib_devs_lock, flags);
133 	return nunits_active;
134 }
135 
136 /*
137  * Return count of all units, optionally return in arguments
138  * the number of usable (present) units, and the number of
139  * ports that are up.
140  */
141 int qib_count_units(int *npresentp, int *nupp)
142 {
143 	int nunits = 0, npresent = 0, nup = 0;
144 	struct qib_devdata *dd;
145 	unsigned long flags;
146 	int pidx;
147 	struct qib_pportdata *ppd;
148 
149 	spin_lock_irqsave(&qib_devs_lock, flags);
150 
151 	list_for_each_entry(dd, &qib_dev_list, list) {
152 		nunits++;
153 		if ((dd->flags & QIB_PRESENT) && dd->kregbase)
154 			npresent++;
155 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
156 			ppd = dd->pport + pidx;
157 			if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
158 					 QIBL_LINKARMED | QIBL_LINKACTIVE)))
159 				nup++;
160 		}
161 	}
162 
163 	spin_unlock_irqrestore(&qib_devs_lock, flags);
164 
165 	if (npresentp)
166 		*npresentp = npresent;
167 	if (nupp)
168 		*nupp = nup;
169 
170 	return nunits;
171 }
172 
173 /**
174  * qib_wait_linkstate - wait for an IB link state change to occur
175  * @dd: the qlogic_ib device
176  * @state: the state to wait for
177  * @msecs: the number of milliseconds to wait
178  *
179  * wait up to msecs milliseconds for IB link state change to occur for
180  * now, take the easy polling route.  Currently used only by
181  * qib_set_linkstate.  Returns 0 if state reached, otherwise
182  * -ETIMEDOUT state can have multiple states set, for any of several
183  * transitions.
184  */
185 int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
186 {
187 	int ret;
188 	unsigned long flags;
189 
190 	spin_lock_irqsave(&ppd->lflags_lock, flags);
191 	if (ppd->state_wanted) {
192 		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
193 		ret = -EBUSY;
194 		goto bail;
195 	}
196 	ppd->state_wanted = state;
197 	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
198 	wait_event_interruptible_timeout(ppd->state_wait,
199 					 (ppd->lflags & state),
200 					 msecs_to_jiffies(msecs));
201 	spin_lock_irqsave(&ppd->lflags_lock, flags);
202 	ppd->state_wanted = 0;
203 	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
204 
205 	if (!(ppd->lflags & state))
206 		ret = -ETIMEDOUT;
207 	else
208 		ret = 0;
209 bail:
210 	return ret;
211 }
212 
213 int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
214 {
215 	u32 lstate;
216 	int ret;
217 	struct qib_devdata *dd = ppd->dd;
218 	unsigned long flags;
219 
220 	switch (newstate) {
221 	case QIB_IB_LINKDOWN_ONLY:
222 		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
223 				 IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
224 		/* don't wait */
225 		ret = 0;
226 		goto bail;
227 
228 	case QIB_IB_LINKDOWN:
229 		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
230 				 IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
231 		/* don't wait */
232 		ret = 0;
233 		goto bail;
234 
235 	case QIB_IB_LINKDOWN_SLEEP:
236 		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
237 				 IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
238 		/* don't wait */
239 		ret = 0;
240 		goto bail;
241 
242 	case QIB_IB_LINKDOWN_DISABLE:
243 		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
244 				 IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
245 		/* don't wait */
246 		ret = 0;
247 		goto bail;
248 
249 	case QIB_IB_LINKARM:
250 		if (ppd->lflags & QIBL_LINKARMED) {
251 			ret = 0;
252 			goto bail;
253 		}
254 		if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
255 			ret = -EINVAL;
256 			goto bail;
257 		}
258 		/*
259 		 * Since the port can be ACTIVE when we ask for ARMED,
260 		 * clear QIBL_LINKV so we can wait for a transition.
261 		 * If the link isn't ARMED, then something else happened
262 		 * and there is no point waiting for ARMED.
263 		 */
264 		spin_lock_irqsave(&ppd->lflags_lock, flags);
265 		ppd->lflags &= ~QIBL_LINKV;
266 		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
267 		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
268 				 IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
269 		lstate = QIBL_LINKV;
270 		break;
271 
272 	case QIB_IB_LINKACTIVE:
273 		if (ppd->lflags & QIBL_LINKACTIVE) {
274 			ret = 0;
275 			goto bail;
276 		}
277 		if (!(ppd->lflags & QIBL_LINKARMED)) {
278 			ret = -EINVAL;
279 			goto bail;
280 		}
281 		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
282 				 IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
283 		lstate = QIBL_LINKACTIVE;
284 		break;
285 
286 	default:
287 		ret = -EINVAL;
288 		goto bail;
289 	}
290 	ret = qib_wait_linkstate(ppd, lstate, 10);
291 
292 bail:
293 	return ret;
294 }
295 
296 /*
297  * Get address of eager buffer from it's index (allocated in chunks, not
298  * contiguous).
299  */
300 static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
301 {
302 	const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
303 	const u32 idx =  etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
304 
305 	return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
306 }
307 
308 /*
309  * Returns 1 if error was a CRC, else 0.
310  * Needed for some chip's synthesized error counters.
311  */
312 static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
313 			  u32 ctxt, u32 eflags, u32 l, u32 etail,
314 			  __le32 *rhf_addr, struct qib_message_header *rhdr)
315 {
316 	u32 ret = 0;
317 
318 	if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
319 		ret = 1;
320 	else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
321 		/* For TIDERR and RC QPs premptively schedule a NAK */
322 		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
323 		struct qib_other_headers *ohdr = NULL;
324 		struct qib_ibport *ibp = &ppd->ibport_data;
325 		struct qib_qp *qp = NULL;
326 		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
327 		u16 lid  = be16_to_cpu(hdr->lrh[1]);
328 		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
329 		u32 qp_num;
330 		u32 opcode;
331 		u32 psn;
332 		int diff;
333 
334 		/* Sanity check packet */
335 		if (tlen < 24)
336 			goto drop;
337 
338 		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
339 			lid &= ~((1 << ppd->lmc) - 1);
340 			if (unlikely(lid != ppd->lid))
341 				goto drop;
342 		}
343 
344 		/* Check for GRH */
345 		if (lnh == QIB_LRH_BTH)
346 			ohdr = &hdr->u.oth;
347 		else if (lnh == QIB_LRH_GRH) {
348 			u32 vtf;
349 
350 			ohdr = &hdr->u.l.oth;
351 			if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
352 				goto drop;
353 			vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
354 			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
355 				goto drop;
356 		} else
357 			goto drop;
358 
359 		/* Get opcode and PSN from packet */
360 		opcode = be32_to_cpu(ohdr->bth[0]);
361 		opcode >>= 24;
362 		psn = be32_to_cpu(ohdr->bth[2]);
363 
364 		/* Get the destination QP number. */
365 		qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
366 		if (qp_num != QIB_MULTICAST_QPN) {
367 			int ruc_res;
368 
369 			qp = qib_lookup_qpn(ibp, qp_num);
370 			if (!qp)
371 				goto drop;
372 
373 			/*
374 			 * Handle only RC QPs - for other QP types drop error
375 			 * packet.
376 			 */
377 			spin_lock(&qp->r_lock);
378 
379 			/* Check for valid receive state. */
380 			if (!(ib_qib_state_ops[qp->state] &
381 			      QIB_PROCESS_RECV_OK)) {
382 				ibp->n_pkt_drops++;
383 				goto unlock;
384 			}
385 
386 			switch (qp->ibqp.qp_type) {
387 			case IB_QPT_RC:
388 				ruc_res =
389 					qib_ruc_check_hdr(
390 						ibp, hdr,
391 						lnh == QIB_LRH_GRH,
392 						qp,
393 						be32_to_cpu(ohdr->bth[0]));
394 				if (ruc_res)
395 					goto unlock;
396 
397 				/* Only deal with RDMA Writes for now */
398 				if (opcode <
399 				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
400 					diff = qib_cmp24(psn, qp->r_psn);
401 					if (!qp->r_nak_state && diff >= 0) {
402 						ibp->n_rc_seqnak++;
403 						qp->r_nak_state =
404 							IB_NAK_PSN_ERROR;
405 						/* Use the expected PSN. */
406 						qp->r_ack_psn = qp->r_psn;
407 						/*
408 						 * Wait to send the sequence
409 						 * NAK until all packets
410 						 * in the receive queue have
411 						 * been processed.
412 						 * Otherwise, we end up
413 						 * propagating congestion.
414 						 */
415 						if (list_empty(&qp->rspwait)) {
416 							qp->r_flags |=
417 								QIB_R_RSP_NAK;
418 							atomic_inc(
419 								&qp->refcount);
420 							list_add_tail(
421 							 &qp->rspwait,
422 							 &rcd->qp_wait_list);
423 						}
424 					} /* Out of sequence NAK */
425 				} /* QP Request NAKs */
426 				break;
427 			case IB_QPT_SMI:
428 			case IB_QPT_GSI:
429 			case IB_QPT_UD:
430 			case IB_QPT_UC:
431 			default:
432 				/* For now don't handle any other QP types */
433 				break;
434 			}
435 
436 unlock:
437 			spin_unlock(&qp->r_lock);
438 			/*
439 			 * Notify qib_destroy_qp() if it is waiting
440 			 * for us to finish.
441 			 */
442 			if (atomic_dec_and_test(&qp->refcount))
443 				wake_up(&qp->wait);
444 		} /* Unicast QP */
445 	} /* Valid packet with TIDErr */
446 
447 drop:
448 	return ret;
449 }
450 
451 /*
452  * qib_kreceive - receive a packet
453  * @rcd: the qlogic_ib context
454  * @llic: gets count of good packets needed to clear lli,
455  *          (used with chips that need need to track crcs for lli)
456  *
457  * called from interrupt handler for errors or receive interrupt
458  * Returns number of CRC error packets, needed by some chips for
459  * local link integrity tracking.   crcs are adjusted down by following
460  * good packets, if any, and count of good packets is also tracked.
461  */
462 u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
463 {
464 	struct qib_devdata *dd = rcd->dd;
465 	struct qib_pportdata *ppd = rcd->ppd;
466 	__le32 *rhf_addr;
467 	void *ebuf;
468 	const u32 rsize = dd->rcvhdrentsize;        /* words */
469 	const u32 maxcnt = dd->rcvhdrcnt * rsize;   /* words */
470 	u32 etail = -1, l, hdrqtail;
471 	struct qib_message_header *hdr;
472 	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
473 	int last;
474 	u64 lval;
475 	struct qib_qp *qp, *nqp;
476 
477 	l = rcd->head;
478 	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
479 	if (dd->flags & QIB_NODMA_RTAIL) {
480 		u32 seq = qib_hdrget_seq(rhf_addr);
481 
482 		if (seq != rcd->seq_cnt)
483 			goto bail;
484 		hdrqtail = 0;
485 	} else {
486 		hdrqtail = qib_get_rcvhdrtail(rcd);
487 		if (l == hdrqtail)
488 			goto bail;
489 		smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
490 	}
491 
492 	for (last = 0, i = 1; !last; i += !last) {
493 		hdr = dd->f_get_msgheader(dd, rhf_addr);
494 		eflags = qib_hdrget_err_flags(rhf_addr);
495 		etype = qib_hdrget_rcv_type(rhf_addr);
496 		/* total length */
497 		tlen = qib_hdrget_length_in_bytes(rhf_addr);
498 		ebuf = NULL;
499 		if ((dd->flags & QIB_NODMA_RTAIL) ?
500 		    qib_hdrget_use_egr_buf(rhf_addr) :
501 		    (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
502 			etail = qib_hdrget_index(rhf_addr);
503 			updegr = 1;
504 			if (tlen > sizeof(*hdr) ||
505 			    etype >= RCVHQ_RCV_TYPE_NON_KD) {
506 				ebuf = qib_get_egrbuf(rcd, etail);
507 				prefetch_range(ebuf, tlen - sizeof(*hdr));
508 			}
509 		}
510 		if (!eflags) {
511 			u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
512 
513 			if (lrh_len != tlen) {
514 				qib_stats.sps_lenerrs++;
515 				goto move_along;
516 			}
517 		}
518 		if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
519 		    ebuf == NULL &&
520 		    tlen > (dd->rcvhdrentsize - 2 + 1 -
521 				qib_hdrget_offset(rhf_addr)) << 2) {
522 			goto move_along;
523 		}
524 
525 		/*
526 		 * Both tiderr and qibhdrerr are set for all plain IB
527 		 * packets; only qibhdrerr should be set.
528 		 */
529 		if (unlikely(eflags))
530 			crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
531 					       etail, rhf_addr, hdr);
532 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
533 			qib_ib_rcv(rcd, hdr, ebuf, tlen);
534 			if (crcs)
535 				crcs--;
536 			else if (llic && *llic)
537 				--*llic;
538 		}
539 move_along:
540 		l += rsize;
541 		if (l >= maxcnt)
542 			l = 0;
543 		if (i == QIB_MAX_PKT_RECV)
544 			last = 1;
545 
546 		rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
547 		if (dd->flags & QIB_NODMA_RTAIL) {
548 			u32 seq = qib_hdrget_seq(rhf_addr);
549 
550 			if (++rcd->seq_cnt > 13)
551 				rcd->seq_cnt = 1;
552 			if (seq != rcd->seq_cnt)
553 				last = 1;
554 		} else if (l == hdrqtail)
555 			last = 1;
556 		/*
557 		 * Update head regs etc., every 16 packets, if not last pkt,
558 		 * to help prevent rcvhdrq overflows, when many packets
559 		 * are processed and queue is nearly full.
560 		 * Don't request an interrupt for intermediate updates.
561 		 */
562 		lval = l;
563 		if (!last && !(i & 0xf)) {
564 			dd->f_update_usrhead(rcd, lval, updegr, etail, i);
565 			updegr = 0;
566 		}
567 	}
568 	/*
569 	 * Notify qib_destroy_qp() if it is waiting
570 	 * for lookaside_qp to finish.
571 	 */
572 	if (rcd->lookaside_qp) {
573 		if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
574 			wake_up(&rcd->lookaside_qp->wait);
575 		rcd->lookaside_qp = NULL;
576 	}
577 
578 	rcd->head = l;
579 
580 	/*
581 	 * Iterate over all QPs waiting to respond.
582 	 * The list won't change since the IRQ is only run on one CPU.
583 	 */
584 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
585 		list_del_init(&qp->rspwait);
586 		if (qp->r_flags & QIB_R_RSP_NAK) {
587 			qp->r_flags &= ~QIB_R_RSP_NAK;
588 			qib_send_rc_ack(qp);
589 		}
590 		if (qp->r_flags & QIB_R_RSP_SEND) {
591 			unsigned long flags;
592 
593 			qp->r_flags &= ~QIB_R_RSP_SEND;
594 			spin_lock_irqsave(&qp->s_lock, flags);
595 			if (ib_qib_state_ops[qp->state] &
596 					QIB_PROCESS_OR_FLUSH_SEND)
597 				qib_schedule_send(qp);
598 			spin_unlock_irqrestore(&qp->s_lock, flags);
599 		}
600 		if (atomic_dec_and_test(&qp->refcount))
601 			wake_up(&qp->wait);
602 	}
603 
604 bail:
605 	/* Report number of packets consumed */
606 	if (npkts)
607 		*npkts = i;
608 
609 	/*
610 	 * Always write head at end, and setup rcv interrupt, even
611 	 * if no packets were processed.
612 	 */
613 	lval = (u64)rcd->head | dd->rhdrhead_intr_off;
614 	dd->f_update_usrhead(rcd, lval, updegr, etail, i);
615 	return crcs;
616 }
617 
618 /**
619  * qib_set_mtu - set the MTU
620  * @ppd: the perport data
621  * @arg: the new MTU
622  *
623  * We can handle "any" incoming size, the issue here is whether we
624  * need to restrict our outgoing size.   For now, we don't do any
625  * sanity checking on this, and we don't deal with what happens to
626  * programs that are already running when the size changes.
627  * NOTE: changing the MTU will usually cause the IBC to go back to
628  * link INIT state...
629  */
630 int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
631 {
632 	u32 piosize;
633 	int ret, chk;
634 
635 	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
636 	    arg != 4096) {
637 		ret = -EINVAL;
638 		goto bail;
639 	}
640 	chk = ib_mtu_enum_to_int(qib_ibmtu);
641 	if (chk > 0 && arg > chk) {
642 		ret = -EINVAL;
643 		goto bail;
644 	}
645 
646 	piosize = ppd->ibmaxlen;
647 	ppd->ibmtu = arg;
648 
649 	if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
650 		/* Only if it's not the initial value (or reset to it) */
651 		if (piosize != ppd->init_ibmaxlen) {
652 			if (arg > piosize && arg <= ppd->init_ibmaxlen)
653 				piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
654 			ppd->ibmaxlen = piosize;
655 		}
656 	} else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
657 		piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
658 		ppd->ibmaxlen = piosize;
659 	}
660 
661 	ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
662 
663 	ret = 0;
664 
665 bail:
666 	return ret;
667 }
668 
669 int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
670 {
671 	struct qib_devdata *dd = ppd->dd;
672 
673 	ppd->lid = lid;
674 	ppd->lmc = lmc;
675 
676 	dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
677 			 lid | (~((1U << lmc) - 1)) << 16);
678 
679 	qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
680 		    dd->unit, ppd->port, lid);
681 
682 	return 0;
683 }
684 
685 /*
686  * Following deal with the "obviously simple" task of overriding the state
687  * of the LEDS, which normally indicate link physical and logical status.
688  * The complications arise in dealing with different hardware mappings
689  * and the board-dependent routine being called from interrupts.
690  * and then there's the requirement to _flash_ them.
691  */
692 #define LED_OVER_FREQ_SHIFT 8
693 #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
694 /* Below is "non-zero" to force override, but both actual LEDs are off */
695 #define LED_OVER_BOTH_OFF (8)
696 
697 static void qib_run_led_override(unsigned long opaque)
698 {
699 	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
700 	struct qib_devdata *dd = ppd->dd;
701 	int timeoff;
702 	int ph_idx;
703 
704 	if (!(dd->flags & QIB_INITTED))
705 		return;
706 
707 	ph_idx = ppd->led_override_phase++ & 1;
708 	ppd->led_override = ppd->led_override_vals[ph_idx];
709 	timeoff = ppd->led_override_timeoff;
710 
711 	dd->f_setextled(ppd, 1);
712 	/*
713 	 * don't re-fire the timer if user asked for it to be off; we let
714 	 * it fire one more time after they turn it off to simplify
715 	 */
716 	if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
717 		mod_timer(&ppd->led_override_timer, jiffies + timeoff);
718 }
719 
720 void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
721 {
722 	struct qib_devdata *dd = ppd->dd;
723 	int timeoff, freq;
724 
725 	if (!(dd->flags & QIB_INITTED))
726 		return;
727 
728 	/* First check if we are blinking. If not, use 1HZ polling */
729 	timeoff = HZ;
730 	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
731 
732 	if (freq) {
733 		/* For blink, set each phase from one nybble of val */
734 		ppd->led_override_vals[0] = val & 0xF;
735 		ppd->led_override_vals[1] = (val >> 4) & 0xF;
736 		timeoff = (HZ << 4)/freq;
737 	} else {
738 		/* Non-blink set both phases the same. */
739 		ppd->led_override_vals[0] = val & 0xF;
740 		ppd->led_override_vals[1] = val & 0xF;
741 	}
742 	ppd->led_override_timeoff = timeoff;
743 
744 	/*
745 	 * If the timer has not already been started, do so. Use a "quick"
746 	 * timeout so the function will be called soon, to look at our request.
747 	 */
748 	if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
749 		/* Need to start timer */
750 		init_timer(&ppd->led_override_timer);
751 		ppd->led_override_timer.function = qib_run_led_override;
752 		ppd->led_override_timer.data = (unsigned long) ppd;
753 		ppd->led_override_timer.expires = jiffies + 1;
754 		add_timer(&ppd->led_override_timer);
755 	} else {
756 		if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
757 			mod_timer(&ppd->led_override_timer, jiffies + 1);
758 		atomic_dec(&ppd->led_override_timer_active);
759 	}
760 }
761 
762 /**
763  * qib_reset_device - reset the chip if possible
764  * @unit: the device to reset
765  *
766  * Whether or not reset is successful, we attempt to re-initialize the chip
767  * (that is, much like a driver unload/reload).  We clear the INITTED flag
768  * so that the various entry points will fail until we reinitialize.  For
769  * now, we only allow this if no user contexts are open that use chip resources
770  */
771 int qib_reset_device(int unit)
772 {
773 	int ret, i;
774 	struct qib_devdata *dd = qib_lookup(unit);
775 	struct qib_pportdata *ppd;
776 	unsigned long flags;
777 	int pidx;
778 
779 	if (!dd) {
780 		ret = -ENODEV;
781 		goto bail;
782 	}
783 
784 	qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
785 
786 	if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
787 		qib_devinfo(dd->pcidev,
788 			"Invalid unit number %u or not initialized or not present\n",
789 			unit);
790 		ret = -ENXIO;
791 		goto bail;
792 	}
793 
794 	spin_lock_irqsave(&dd->uctxt_lock, flags);
795 	if (dd->rcd)
796 		for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
797 			if (!dd->rcd[i] || !dd->rcd[i]->cnt)
798 				continue;
799 			spin_unlock_irqrestore(&dd->uctxt_lock, flags);
800 			ret = -EBUSY;
801 			goto bail;
802 		}
803 	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
804 
805 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
806 		ppd = dd->pport + pidx;
807 		if (atomic_read(&ppd->led_override_timer_active)) {
808 			/* Need to stop LED timer, _then_ shut off LEDs */
809 			del_timer_sync(&ppd->led_override_timer);
810 			atomic_set(&ppd->led_override_timer_active, 0);
811 		}
812 
813 		/* Shut off LEDs after we are sure timer is not running */
814 		ppd->led_override = LED_OVER_BOTH_OFF;
815 		dd->f_setextled(ppd, 0);
816 		if (dd->flags & QIB_HAS_SEND_DMA)
817 			qib_teardown_sdma(ppd);
818 	}
819 
820 	ret = dd->f_reset(dd);
821 	if (ret == 1)
822 		ret = qib_init(dd, 1);
823 	else
824 		ret = -EAGAIN;
825 	if (ret)
826 		qib_dev_err(dd,
827 			"Reinitialize unit %u after reset failed with %d\n",
828 			unit, ret);
829 	else
830 		qib_devinfo(dd->pcidev,
831 			"Reinitialized unit %u after resetting\n",
832 			unit);
833 
834 bail:
835 	return ret;
836 }
837