xref: /openbmc/linux/drivers/infiniband/hw/hfi1/mad.c (revision 41e4b7dc)
1 /*
2  * Copyright(c) 2015-2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include <linux/net.h>
49 #include <rdma/opa_addr.h>
50 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
51 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
52 
53 #include "hfi.h"
54 #include "mad.h"
55 #include "trace.h"
56 #include "qp.h"
57 #include "vnic.h"
58 
59 /* the reset value from the FM is supposed to be 0xffff, handle both */
60 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
61 #define OPA_LINK_WIDTH_RESET 0xffff
62 
63 struct trap_node {
64 	struct list_head list;
65 	struct opa_mad_notice_attr data;
66 	__be64 tid;
67 	int len;
68 	u32 retry;
69 	u8 in_use;
70 	u8 repress;
71 };
72 
73 static int smp_length_check(u32 data_size, u32 request_len)
74 {
75 	if (unlikely(request_len < data_size))
76 		return -EINVAL;
77 
78 	return 0;
79 }
80 
81 static int reply(struct ib_mad_hdr *smp)
82 {
83 	/*
84 	 * The verbs framework will handle the directed/LID route
85 	 * packet changes.
86 	 */
87 	smp->method = IB_MGMT_METHOD_GET_RESP;
88 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
89 		smp->status |= IB_SMP_DIRECTION;
90 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
91 }
92 
93 static inline void clear_opa_smp_data(struct opa_smp *smp)
94 {
95 	void *data = opa_get_smp_data(smp);
96 	size_t size = opa_get_smp_data_size(smp);
97 
98 	memset(data, 0, size);
99 }
100 
101 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
102 {
103 	struct ib_event event;
104 
105 	event.event = IB_EVENT_PKEY_CHANGE;
106 	event.device = &dd->verbs_dev.rdi.ibdev;
107 	event.element.port_num = port;
108 	ib_dispatch_event(&event);
109 }
110 
111 /*
112  * If the port is down, clean up all pending traps.  We need to be careful
113  * with the given trap, because it may be queued.
114  */
115 static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
116 {
117 	struct trap_node *node, *q;
118 	unsigned long flags;
119 	struct list_head trap_list;
120 	int i;
121 
122 	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
123 		spin_lock_irqsave(&ibp->rvp.lock, flags);
124 		list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
125 		ibp->rvp.trap_lists[i].list_len = 0;
126 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
127 
128 		/*
129 		 * Remove all items from the list, freeing all the non-given
130 		 * traps.
131 		 */
132 		list_for_each_entry_safe(node, q, &trap_list, list) {
133 			list_del(&node->list);
134 			if (node != trap)
135 				kfree(node);
136 		}
137 	}
138 
139 	/*
140 	 * If this wasn't on one of the lists it would not be freed.  If it
141 	 * was on the list, it is now safe to free.
142 	 */
143 	kfree(trap);
144 }
145 
146 static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
147 					    struct trap_node *trap)
148 {
149 	struct trap_node *node;
150 	struct trap_list *trap_list;
151 	unsigned long flags;
152 	unsigned long timeout;
153 	int found = 0;
154 	unsigned int queue_id;
155 	static int trap_count;
156 
157 	queue_id = trap->data.generic_type & 0x0F;
158 	if (queue_id >= RVT_MAX_TRAP_LISTS) {
159 		trap_count++;
160 		pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
161 				   trap->data.generic_type, trap_count);
162 		kfree(trap);
163 		return NULL;
164 	}
165 
166 	/*
167 	 * Since the retry (handle timeout) does not remove a trap request
168 	 * from the list, all we have to do is compare the node.
169 	 */
170 	spin_lock_irqsave(&ibp->rvp.lock, flags);
171 	trap_list = &ibp->rvp.trap_lists[queue_id];
172 
173 	list_for_each_entry(node, &trap_list->list, list) {
174 		if (node == trap) {
175 			node->retry++;
176 			found = 1;
177 			break;
178 		}
179 	}
180 
181 	/* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
182 	if (!found) {
183 		if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
184 			trap_list->list_len++;
185 			list_add_tail(&trap->list, &trap_list->list);
186 		} else {
187 			pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
188 					    trap->data.generic_type);
189 			kfree(trap);
190 		}
191 	}
192 
193 	/*
194 	 * Next check to see if there is a timer pending.  If not, set it up
195 	 * and get the first trap from the list.
196 	 */
197 	node = NULL;
198 	if (!timer_pending(&ibp->rvp.trap_timer)) {
199 		/*
200 		 * o14-2
201 		 * If the time out is set we have to wait until it expires
202 		 * before the trap can be sent.
203 		 * This should be > RVT_TRAP_TIMEOUT
204 		 */
205 		timeout = (RVT_TRAP_TIMEOUT *
206 			   (1UL << ibp->rvp.subnet_timeout)) / 1000;
207 		mod_timer(&ibp->rvp.trap_timer,
208 			  jiffies + usecs_to_jiffies(timeout));
209 		node = list_first_entry(&trap_list->list, struct trap_node,
210 					list);
211 		node->in_use = 1;
212 	}
213 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
214 
215 	return node;
216 }
217 
218 static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
219 					 struct opa_smp *smp)
220 {
221 	struct trap_list *trap_list;
222 	struct trap_node *trap;
223 	unsigned long flags;
224 	int i;
225 
226 	if (smp->attr_id != IB_SMP_ATTR_NOTICE)
227 		return;
228 
229 	spin_lock_irqsave(&ibp->rvp.lock, flags);
230 	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
231 		trap_list = &ibp->rvp.trap_lists[i];
232 		trap = list_first_entry_or_null(&trap_list->list,
233 						struct trap_node, list);
234 		if (trap && trap->tid == smp->tid) {
235 			if (trap->in_use) {
236 				trap->repress = 1;
237 			} else {
238 				trap_list->list_len--;
239 				list_del(&trap->list);
240 				kfree(trap);
241 			}
242 			break;
243 		}
244 	}
245 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
246 }
247 
248 static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
249 				   struct rdma_ah_attr *attr, u32 dlid)
250 {
251 	rdma_ah_set_dlid(attr, dlid);
252 	rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
253 	if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
254 		struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
255 
256 		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
257 		grh->sgid_index = 0;
258 		grh->hop_limit = 1;
259 		grh->dgid.global.subnet_prefix =
260 			ibp->rvp.gid_prefix;
261 		grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
262 	}
263 }
264 
265 static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
266 			      struct rvt_ah *ah, u32 dlid)
267 {
268 	struct rdma_ah_attr attr;
269 	struct rvt_qp *qp0;
270 	int ret = -EINVAL;
271 
272 	memset(&attr, 0, sizeof(attr));
273 	attr.type = ah->ibah.type;
274 	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
275 	rcu_read_lock();
276 	qp0 = rcu_dereference(ibp->rvp.qp[0]);
277 	if (qp0)
278 		ret = rdma_modify_ah(&ah->ibah, &attr);
279 	rcu_read_unlock();
280 	return ret;
281 }
282 
283 static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
284 {
285 	struct rdma_ah_attr attr;
286 	struct ib_ah *ah = ERR_PTR(-EINVAL);
287 	struct rvt_qp *qp0;
288 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
289 	struct hfi1_devdata *dd = dd_from_ppd(ppd);
290 	u8 port_num = ppd->port;
291 
292 	memset(&attr, 0, sizeof(attr));
293 	attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
294 	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
295 	rcu_read_lock();
296 	qp0 = rcu_dereference(ibp->rvp.qp[0]);
297 	if (qp0)
298 		ah = rdma_create_ah(qp0->ibqp.pd, &attr);
299 	rcu_read_unlock();
300 	return ah;
301 }
302 
303 static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
304 {
305 	struct ib_mad_send_buf *send_buf;
306 	struct ib_mad_agent *agent;
307 	struct opa_smp *smp;
308 	unsigned long flags;
309 	int pkey_idx;
310 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
311 
312 	agent = ibp->rvp.send_agent;
313 	if (!agent) {
314 		cleanup_traps(ibp, trap);
315 		return;
316 	}
317 
318 	/* o14-3.2.1 */
319 	if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
320 		cleanup_traps(ibp, trap);
321 		return;
322 	}
323 
324 	/* Add the trap to the list if necessary and see if we can send it */
325 	trap = check_and_add_trap(ibp, trap);
326 	if (!trap)
327 		return;
328 
329 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
330 	if (pkey_idx < 0) {
331 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
332 			__func__, hfi1_get_pkey(ibp, 1));
333 		pkey_idx = 1;
334 	}
335 
336 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
337 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
338 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
339 	if (IS_ERR(send_buf))
340 		return;
341 
342 	smp = send_buf->mad;
343 	smp->base_version = OPA_MGMT_BASE_VERSION;
344 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
345 	smp->class_version = OPA_SM_CLASS_VERSION;
346 	smp->method = IB_MGMT_METHOD_TRAP;
347 
348 	/* Only update the transaction ID for new traps (o13-5). */
349 	if (trap->tid == 0) {
350 		ibp->rvp.tid++;
351 		/* make sure that tid != 0 */
352 		if (ibp->rvp.tid == 0)
353 			ibp->rvp.tid++;
354 		trap->tid = cpu_to_be64(ibp->rvp.tid);
355 	}
356 	smp->tid = trap->tid;
357 
358 	smp->attr_id = IB_SMP_ATTR_NOTICE;
359 	/* o14-1: smp->mkey = 0; */
360 
361 	memcpy(smp->route.lid.data, &trap->data, trap->len);
362 
363 	spin_lock_irqsave(&ibp->rvp.lock, flags);
364 	if (!ibp->rvp.sm_ah) {
365 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
366 			struct ib_ah *ah;
367 
368 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
369 			if (IS_ERR(ah)) {
370 				spin_unlock_irqrestore(&ibp->rvp.lock, flags);
371 				return;
372 			}
373 			send_buf->ah = ah;
374 			ibp->rvp.sm_ah = ibah_to_rvtah(ah);
375 		} else {
376 			spin_unlock_irqrestore(&ibp->rvp.lock, flags);
377 			return;
378 		}
379 	} else {
380 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
381 	}
382 
383 	/*
384 	 * If the trap was repressed while things were getting set up, don't
385 	 * bother sending it. This could happen for a retry.
386 	 */
387 	if (trap->repress) {
388 		list_del(&trap->list);
389 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
390 		kfree(trap);
391 		ib_free_send_mad(send_buf);
392 		return;
393 	}
394 
395 	trap->in_use = 0;
396 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
397 
398 	if (ib_post_send_mad(send_buf, NULL))
399 		ib_free_send_mad(send_buf);
400 }
401 
402 void hfi1_handle_trap_timer(unsigned long data)
403 {
404 	struct hfi1_ibport *ibp = (struct hfi1_ibport *)data;
405 	struct trap_node *trap = NULL;
406 	unsigned long flags;
407 	int i;
408 
409 	/* Find the trap with the highest priority */
410 	spin_lock_irqsave(&ibp->rvp.lock, flags);
411 	for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
412 		trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
413 						struct trap_node, list);
414 	}
415 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
416 
417 	if (trap)
418 		send_trap(ibp, trap);
419 }
420 
421 static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
422 {
423 	struct trap_node *trap;
424 
425 	trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
426 	if (!trap)
427 		return NULL;
428 
429 	INIT_LIST_HEAD(&trap->list);
430 	trap->data.generic_type = type;
431 	trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
432 	trap->data.trap_num = trap_num;
433 	trap->data.issuer_lid = cpu_to_be32(lid);
434 
435 	return trap;
436 }
437 
438 /*
439  * Send a bad P_Key trap (ch. 14.3.8).
440  */
441 void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
442 		   u32 qp1, u32 qp2, u32 lid1, u32 lid2)
443 {
444 	struct trap_node *trap;
445 	u32 lid = ppd_from_ibp(ibp)->lid;
446 
447 	ibp->rvp.n_pkt_drops++;
448 	ibp->rvp.pkey_violations++;
449 
450 	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
451 				lid);
452 	if (!trap)
453 		return;
454 
455 	/* Send violation trap */
456 	trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
457 	trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
458 	trap->data.ntc_257_258.key = cpu_to_be32(key);
459 	trap->data.ntc_257_258.sl = sl << 3;
460 	trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
461 	trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
462 
463 	trap->len = sizeof(trap->data);
464 	send_trap(ibp, trap);
465 }
466 
467 /*
468  * Send a bad M_Key trap (ch. 14.3.9).
469  */
470 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
471 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
472 {
473 	struct trap_node *trap;
474 	u32 lid = ppd_from_ibp(ibp)->lid;
475 
476 	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
477 				lid);
478 	if (!trap)
479 		return;
480 
481 	/* Send violation trap */
482 	trap->data.ntc_256.lid = trap->data.issuer_lid;
483 	trap->data.ntc_256.method = mad->method;
484 	trap->data.ntc_256.attr_id = mad->attr_id;
485 	trap->data.ntc_256.attr_mod = mad->attr_mod;
486 	trap->data.ntc_256.mkey = mkey;
487 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
488 		trap->data.ntc_256.dr_slid = dr_slid;
489 		trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
490 		if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
491 			trap->data.ntc_256.dr_trunc_hop |=
492 				IB_NOTICE_TRAP_DR_TRUNC;
493 			hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
494 		}
495 		trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
496 		memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
497 		       hop_cnt);
498 	}
499 
500 	trap->len = sizeof(trap->data);
501 
502 	send_trap(ibp, trap);
503 }
504 
505 /*
506  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
507  */
508 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
509 {
510 	struct trap_node *trap;
511 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
512 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
513 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
514 	u32 lid = ppd_from_ibp(ibp)->lid;
515 
516 	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
517 				OPA_TRAP_CHANGE_CAPABILITY,
518 				lid);
519 	if (!trap)
520 		return;
521 
522 	trap->data.ntc_144.lid = trap->data.issuer_lid;
523 	trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
524 	trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
525 
526 	trap->len = sizeof(trap->data);
527 	send_trap(ibp, trap);
528 }
529 
530 /*
531  * Send a System Image GUID Changed trap (ch. 14.3.12).
532  */
533 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
534 {
535 	struct trap_node *trap;
536 	u32 lid = ppd_from_ibp(ibp)->lid;
537 
538 	trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
539 				lid);
540 	if (!trap)
541 		return;
542 
543 	trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
544 	trap->data.ntc_145.lid = trap->data.issuer_lid;
545 
546 	trap->len = sizeof(trap->data);
547 	send_trap(ibp, trap);
548 }
549 
550 /*
551  * Send a Node Description Changed trap (ch. 14.3.13).
552  */
553 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
554 {
555 	struct trap_node *trap;
556 	u32 lid = ppd_from_ibp(ibp)->lid;
557 
558 	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
559 				OPA_TRAP_CHANGE_CAPABILITY,
560 				lid);
561 	if (!trap)
562 		return;
563 
564 	trap->data.ntc_144.lid = trap->data.issuer_lid;
565 	trap->data.ntc_144.change_flags =
566 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
567 
568 	trap->len = sizeof(trap->data);
569 	send_trap(ibp, trap);
570 }
571 
572 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
573 				   u8 *data, struct ib_device *ibdev,
574 				   u8 port, u32 *resp_len, u32 max_len)
575 {
576 	struct opa_node_description *nd;
577 
578 	if (am || smp_length_check(sizeof(*nd), max_len)) {
579 		smp->status |= IB_SMP_INVALID_FIELD;
580 		return reply((struct ib_mad_hdr *)smp);
581 	}
582 
583 	nd = (struct opa_node_description *)data;
584 
585 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
586 
587 	if (resp_len)
588 		*resp_len += sizeof(*nd);
589 
590 	return reply((struct ib_mad_hdr *)smp);
591 }
592 
593 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
594 				   struct ib_device *ibdev, u8 port,
595 				   u32 *resp_len, u32 max_len)
596 {
597 	struct opa_node_info *ni;
598 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
599 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
600 
601 	ni = (struct opa_node_info *)data;
602 
603 	/* GUID 0 is illegal */
604 	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
605 	    smp_length_check(sizeof(*ni), max_len) ||
606 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
607 		smp->status |= IB_SMP_INVALID_FIELD;
608 		return reply((struct ib_mad_hdr *)smp);
609 	}
610 
611 	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
612 	ni->base_version = OPA_MGMT_BASE_VERSION;
613 	ni->class_version = OPA_SM_CLASS_VERSION;
614 	ni->node_type = 1;     /* channel adapter */
615 	ni->num_ports = ibdev->phys_port_cnt;
616 	/* This is already in network order */
617 	ni->system_image_guid = ib_hfi1_sys_image_guid;
618 	ni->node_guid = ibdev->node_guid;
619 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
620 	ni->device_id = cpu_to_be16(dd->pcidev->device);
621 	ni->revision = cpu_to_be32(dd->minrev);
622 	ni->local_port_num = port;
623 	ni->vendor_id[0] = dd->oui1;
624 	ni->vendor_id[1] = dd->oui2;
625 	ni->vendor_id[2] = dd->oui3;
626 
627 	if (resp_len)
628 		*resp_len += sizeof(*ni);
629 
630 	return reply((struct ib_mad_hdr *)smp);
631 }
632 
633 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
634 			     u8 port)
635 {
636 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
637 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
638 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
639 
640 	/* GUID 0 is illegal */
641 	if (smp->attr_mod || pidx >= dd->num_pports ||
642 	    ibdev->node_guid == 0 ||
643 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
644 		smp->status |= IB_SMP_INVALID_FIELD;
645 		return reply((struct ib_mad_hdr *)smp);
646 	}
647 
648 	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
649 	nip->base_version = OPA_MGMT_BASE_VERSION;
650 	nip->class_version = OPA_SM_CLASS_VERSION;
651 	nip->node_type = 1;     /* channel adapter */
652 	nip->num_ports = ibdev->phys_port_cnt;
653 	/* This is already in network order */
654 	nip->sys_guid = ib_hfi1_sys_image_guid;
655 	nip->node_guid = ibdev->node_guid;
656 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
657 	nip->device_id = cpu_to_be16(dd->pcidev->device);
658 	nip->revision = cpu_to_be32(dd->minrev);
659 	nip->local_port_num = port;
660 	nip->vendor_id[0] = dd->oui1;
661 	nip->vendor_id[1] = dd->oui2;
662 	nip->vendor_id[2] = dd->oui3;
663 
664 	return reply((struct ib_mad_hdr *)smp);
665 }
666 
667 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
668 {
669 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
670 }
671 
672 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
673 {
674 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
675 }
676 
677 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
678 {
679 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
680 }
681 
682 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
683 		      int mad_flags, __be64 mkey, __be32 dr_slid,
684 		      u8 return_path[], u8 hop_cnt)
685 {
686 	int valid_mkey = 0;
687 	int ret = 0;
688 
689 	/* Is the mkey in the process of expiring? */
690 	if (ibp->rvp.mkey_lease_timeout &&
691 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
692 		/* Clear timeout and mkey protection field. */
693 		ibp->rvp.mkey_lease_timeout = 0;
694 		ibp->rvp.mkeyprot = 0;
695 	}
696 
697 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
698 	    ibp->rvp.mkey == mkey)
699 		valid_mkey = 1;
700 
701 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
702 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
703 	    (mad->method == IB_MGMT_METHOD_GET ||
704 	     mad->method == IB_MGMT_METHOD_SET ||
705 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
706 		ibp->rvp.mkey_lease_timeout = 0;
707 
708 	if (!valid_mkey) {
709 		switch (mad->method) {
710 		case IB_MGMT_METHOD_GET:
711 			/* Bad mkey not a violation below level 2 */
712 			if (ibp->rvp.mkeyprot < 2)
713 				break;
714 		case IB_MGMT_METHOD_SET:
715 		case IB_MGMT_METHOD_TRAP_REPRESS:
716 			if (ibp->rvp.mkey_violations != 0xFFFF)
717 				++ibp->rvp.mkey_violations;
718 			if (!ibp->rvp.mkey_lease_timeout &&
719 			    ibp->rvp.mkey_lease_period)
720 				ibp->rvp.mkey_lease_timeout = jiffies +
721 					ibp->rvp.mkey_lease_period * HZ;
722 			/* Generate a trap notice. */
723 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
724 				 hop_cnt);
725 			ret = 1;
726 		}
727 	}
728 
729 	return ret;
730 }
731 
732 /*
733  * The SMA caches reads from LCB registers in case the LCB is unavailable.
734  * (The LCB is unavailable in certain link states, for example.)
735  */
736 struct lcb_datum {
737 	u32 off;
738 	u64 val;
739 };
740 
741 static struct lcb_datum lcb_cache[] = {
742 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
743 };
744 
745 static int write_lcb_cache(u32 off, u64 val)
746 {
747 	int i;
748 
749 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
750 		if (lcb_cache[i].off == off) {
751 			lcb_cache[i].val = val;
752 			return 0;
753 		}
754 	}
755 
756 	pr_warn("%s bad offset 0x%x\n", __func__, off);
757 	return -1;
758 }
759 
760 static int read_lcb_cache(u32 off, u64 *val)
761 {
762 	int i;
763 
764 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
765 		if (lcb_cache[i].off == off) {
766 			*val = lcb_cache[i].val;
767 			return 0;
768 		}
769 	}
770 
771 	pr_warn("%s bad offset 0x%x\n", __func__, off);
772 	return -1;
773 }
774 
775 void read_ltp_rtt(struct hfi1_devdata *dd)
776 {
777 	u64 reg;
778 
779 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
780 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
781 	else
782 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
783 }
784 
785 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
786 				   struct ib_device *ibdev, u8 port,
787 				   u32 *resp_len, u32 max_len)
788 {
789 	int i;
790 	struct hfi1_devdata *dd;
791 	struct hfi1_pportdata *ppd;
792 	struct hfi1_ibport *ibp;
793 	struct opa_port_info *pi = (struct opa_port_info *)data;
794 	u8 mtu;
795 	u8 credit_rate;
796 	u8 is_beaconing_active;
797 	u32 state;
798 	u32 num_ports = OPA_AM_NPORT(am);
799 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
800 	u32 buffer_units;
801 	u64 tmp = 0;
802 
803 	if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
804 		smp->status |= IB_SMP_INVALID_FIELD;
805 		return reply((struct ib_mad_hdr *)smp);
806 	}
807 
808 	dd = dd_from_ibdev(ibdev);
809 	/* IB numbers ports from 1, hw from 0 */
810 	ppd = dd->pport + (port - 1);
811 	ibp = &ppd->ibport_data;
812 
813 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
814 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
815 		smp->status |= IB_SMP_INVALID_FIELD;
816 		return reply((struct ib_mad_hdr *)smp);
817 	}
818 
819 	pi->lid = cpu_to_be32(ppd->lid);
820 
821 	/* Only return the mkey if the protection field allows it. */
822 	if (!(smp->method == IB_MGMT_METHOD_GET &&
823 	      ibp->rvp.mkey != smp->mkey &&
824 	      ibp->rvp.mkeyprot == 1))
825 		pi->mkey = ibp->rvp.mkey;
826 
827 	pi->subnet_prefix = ibp->rvp.gid_prefix;
828 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
829 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
830 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
831 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
832 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
833 
834 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
835 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
836 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
837 
838 	pi->link_width_downgrade.supported =
839 			cpu_to_be16(ppd->link_width_downgrade_supported);
840 	pi->link_width_downgrade.enabled =
841 			cpu_to_be16(ppd->link_width_downgrade_enabled);
842 	pi->link_width_downgrade.tx_active =
843 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
844 	pi->link_width_downgrade.rx_active =
845 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
846 
847 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
848 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
849 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
850 
851 	state = driver_lstate(ppd);
852 
853 	if (start_of_sm_config && (state == IB_PORT_INIT))
854 		ppd->is_sm_config_started = 1;
855 
856 	pi->port_phys_conf = (ppd->port_type & 0xf);
857 
858 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
859 	pi->port_states.ledenable_offlinereason |=
860 		ppd->is_sm_config_started << 5;
861 	/*
862 	 * This pairs with the memory barrier in hfi1_start_led_override to
863 	 * ensure that we read the correct state of LED beaconing represented
864 	 * by led_override_timer_active
865 	 */
866 	smp_rmb();
867 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
868 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
869 	pi->port_states.ledenable_offlinereason |=
870 		ppd->offline_disabled_reason;
871 
872 	pi->port_states.portphysstate_portstate =
873 		(driver_pstate(ppd) << 4) | state;
874 
875 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
876 
877 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
878 	for (i = 0; i < ppd->vls_supported; i++) {
879 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
880 		if ((i % 2) == 0)
881 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
882 		else
883 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
884 	}
885 	/* don't forget VL 15 */
886 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
887 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
888 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
889 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
890 	pi->partenforce_filterraw |=
891 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
892 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
893 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
894 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
895 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
896 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
897 	/* P_KeyViolations are counted by hardware. */
898 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
899 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
900 
901 	pi->vl.cap = ppd->vls_supported;
902 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
903 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
904 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
905 
906 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
907 
908 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
909 					  OPA_PORT_LINK_MODE_OPA << 5 |
910 					  OPA_PORT_LINK_MODE_OPA);
911 
912 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
913 
914 	pi->port_mode = cpu_to_be16(
915 				ppd->is_active_optimize_enabled ?
916 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
917 
918 	pi->port_packet_format.supported =
919 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
920 			    OPA_PORT_PACKET_FORMAT_16B);
921 	pi->port_packet_format.enabled =
922 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
923 			    OPA_PORT_PACKET_FORMAT_16B);
924 
925 	/* flit_control.interleave is (OPA V1, version .76):
926 	 * bits		use
927 	 * ----		---
928 	 * 2		res
929 	 * 2		DistanceSupported
930 	 * 2		DistanceEnabled
931 	 * 5		MaxNextLevelTxEnabled
932 	 * 5		MaxNestLevelRxSupported
933 	 *
934 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
935 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
936 	 * to 0x1.
937 	 */
938 	pi->flit_control.interleave = cpu_to_be16(0x1400);
939 
940 	pi->link_down_reason = ppd->local_link_down_reason.sma;
941 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
942 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
943 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
944 
945 	/* 32.768 usec. response time (guessing) */
946 	pi->resptimevalue = 3;
947 
948 	pi->local_port_num = port;
949 
950 	/* buffer info for FM */
951 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
952 
953 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
954 	pi->neigh_port_num = ppd->neighbor_port_number;
955 	pi->port_neigh_mode =
956 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
957 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
958 		(ppd->neighbor_fm_security ?
959 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
960 
961 	/* HFIs shall always return VL15 credits to their
962 	 * neighbor in a timely manner, without any credit return pacing.
963 	 */
964 	credit_rate = 0;
965 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
966 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
967 	buffer_units |= (credit_rate << 6) &
968 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
969 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
970 	pi->buffer_units = cpu_to_be32(buffer_units);
971 
972 	pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
973 	pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
974 					    << 3 | (OPA_MCAST_NR & 0x7));
975 
976 	/* HFI supports a replay buffer 128 LTPs in size */
977 	pi->replay_depth.buffer = 0x80;
978 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
979 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
980 
981 	/*
982 	 * this counter is 16 bits wide, but the replay_depth.wire
983 	 * variable is only 8 bits
984 	 */
985 	if (tmp > 0xff)
986 		tmp = 0xff;
987 	pi->replay_depth.wire = tmp;
988 
989 	if (resp_len)
990 		*resp_len += sizeof(struct opa_port_info);
991 
992 	return reply((struct ib_mad_hdr *)smp);
993 }
994 
995 /**
996  * get_pkeys - return the PKEY table
997  * @dd: the hfi1_ib device
998  * @port: the IB port number
999  * @pkeys: the pkey table is placed here
1000  */
1001 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1002 {
1003 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
1004 
1005 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
1006 
1007 	return 0;
1008 }
1009 
1010 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1011 				    struct ib_device *ibdev, u8 port,
1012 				    u32 *resp_len, u32 max_len)
1013 {
1014 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1015 	u32 n_blocks_req = OPA_AM_NBLK(am);
1016 	u32 start_block = am & 0x7ff;
1017 	__be16 *p;
1018 	u16 *q;
1019 	int i;
1020 	u16 n_blocks_avail;
1021 	unsigned npkeys = hfi1_get_npkeys(dd);
1022 	size_t size;
1023 
1024 	if (n_blocks_req == 0) {
1025 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1026 			port, start_block, n_blocks_req);
1027 		smp->status |= IB_SMP_INVALID_FIELD;
1028 		return reply((struct ib_mad_hdr *)smp);
1029 	}
1030 
1031 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1032 
1033 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
1034 
1035 	if (smp_length_check(size, max_len)) {
1036 		smp->status |= IB_SMP_INVALID_FIELD;
1037 		return reply((struct ib_mad_hdr *)smp);
1038 	}
1039 
1040 	if (start_block + n_blocks_req > n_blocks_avail ||
1041 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1042 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
1043 			"avail 0x%x; blk/smp 0x%lx\n",
1044 			start_block, n_blocks_req, n_blocks_avail,
1045 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1046 		smp->status |= IB_SMP_INVALID_FIELD;
1047 		return reply((struct ib_mad_hdr *)smp);
1048 	}
1049 
1050 	p = (__be16 *)data;
1051 	q = (u16 *)data;
1052 	/* get the real pkeys if we are requesting the first block */
1053 	if (start_block == 0) {
1054 		get_pkeys(dd, port, q);
1055 		for (i = 0; i < npkeys; i++)
1056 			p[i] = cpu_to_be16(q[i]);
1057 		if (resp_len)
1058 			*resp_len += size;
1059 	} else {
1060 		smp->status |= IB_SMP_INVALID_FIELD;
1061 	}
1062 	return reply((struct ib_mad_hdr *)smp);
1063 }
1064 
1065 enum {
1066 	HFI_TRANSITION_DISALLOWED,
1067 	HFI_TRANSITION_IGNORED,
1068 	HFI_TRANSITION_ALLOWED,
1069 	HFI_TRANSITION_UNDEFINED,
1070 };
1071 
1072 /*
1073  * Use shortened names to improve readability of
1074  * {logical,physical}_state_transitions
1075  */
1076 enum {
1077 	__D = HFI_TRANSITION_DISALLOWED,
1078 	__I = HFI_TRANSITION_IGNORED,
1079 	__A = HFI_TRANSITION_ALLOWED,
1080 	__U = HFI_TRANSITION_UNDEFINED,
1081 };
1082 
1083 /*
1084  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
1085  * represented in physical_state_transitions.
1086  */
1087 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
1088 
1089 /*
1090  * Within physical_state_transitions, rows represent "old" states,
1091  * columns "new" states, and physical_state_transitions.allowed[old][new]
1092  * indicates if the transition from old state to new state is legal (see
1093  * OPAg1v1, Table 6-4).
1094  */
1095 static const struct {
1096 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
1097 } physical_state_transitions = {
1098 	{
1099 		/* 2    3    4    5    6    7    8    9   10   11 */
1100 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
1101 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
1102 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1103 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
1104 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1105 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
1106 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1107 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
1108 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1109 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
1110 	}
1111 };
1112 
1113 /*
1114  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
1115  * logical_state_transitions
1116  */
1117 
1118 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
1119 
1120 /*
1121  * Within logical_state_transitions rows represent "old" states,
1122  * columns "new" states, and logical_state_transitions.allowed[old][new]
1123  * indicates if the transition from old state to new state is legal (see
1124  * OPAg1v1, Table 9-12).
1125  */
1126 static const struct {
1127 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
1128 } logical_state_transitions = {
1129 	{
1130 		/* 1    2    3    4    5 */
1131 	/* 1 */	{ __I, __D, __D, __D, __U},
1132 	/* 2 */	{ __D, __I, __A, __D, __U},
1133 	/* 3 */	{ __D, __D, __I, __A, __U},
1134 	/* 4 */	{ __D, __D, __I, __I, __U},
1135 	/* 5 */	{ __U, __U, __U, __U, __U},
1136 	}
1137 };
1138 
1139 static int logical_transition_allowed(int old, int new)
1140 {
1141 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
1142 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
1143 		pr_warn("invalid logical state(s) (old %d new %d)\n",
1144 			old, new);
1145 		return HFI_TRANSITION_UNDEFINED;
1146 	}
1147 
1148 	if (new == IB_PORT_NOP)
1149 		return HFI_TRANSITION_ALLOWED; /* always allowed */
1150 
1151 	/* adjust states for indexing into logical_state_transitions */
1152 	old -= IB_PORT_DOWN;
1153 	new -= IB_PORT_DOWN;
1154 
1155 	if (old < 0 || new < 0)
1156 		return HFI_TRANSITION_UNDEFINED;
1157 	return logical_state_transitions.allowed[old][new];
1158 }
1159 
1160 static int physical_transition_allowed(int old, int new)
1161 {
1162 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
1163 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
1164 		pr_warn("invalid physical state(s) (old %d new %d)\n",
1165 			old, new);
1166 		return HFI_TRANSITION_UNDEFINED;
1167 	}
1168 
1169 	if (new == IB_PORTPHYSSTATE_NOP)
1170 		return HFI_TRANSITION_ALLOWED; /* always allowed */
1171 
1172 	/* adjust states for indexing into physical_state_transitions */
1173 	old -= IB_PORTPHYSSTATE_POLLING;
1174 	new -= IB_PORTPHYSSTATE_POLLING;
1175 
1176 	if (old < 0 || new < 0)
1177 		return HFI_TRANSITION_UNDEFINED;
1178 	return physical_state_transitions.allowed[old][new];
1179 }
1180 
1181 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
1182 					  u32 logical_new, u32 physical_new)
1183 {
1184 	u32 physical_old = driver_pstate(ppd);
1185 	u32 logical_old = driver_lstate(ppd);
1186 	int ret, logical_allowed, physical_allowed;
1187 
1188 	ret = logical_transition_allowed(logical_old, logical_new);
1189 	logical_allowed = ret;
1190 
1191 	if (ret == HFI_TRANSITION_DISALLOWED ||
1192 	    ret == HFI_TRANSITION_UNDEFINED) {
1193 		pr_warn("invalid logical state transition %s -> %s\n",
1194 			opa_lstate_name(logical_old),
1195 			opa_lstate_name(logical_new));
1196 		return ret;
1197 	}
1198 
1199 	ret = physical_transition_allowed(physical_old, physical_new);
1200 	physical_allowed = ret;
1201 
1202 	if (ret == HFI_TRANSITION_DISALLOWED ||
1203 	    ret == HFI_TRANSITION_UNDEFINED) {
1204 		pr_warn("invalid physical state transition %s -> %s\n",
1205 			opa_pstate_name(physical_old),
1206 			opa_pstate_name(physical_new));
1207 		return ret;
1208 	}
1209 
1210 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
1211 	    physical_allowed == HFI_TRANSITION_IGNORED)
1212 		return HFI_TRANSITION_IGNORED;
1213 
1214 	/*
1215 	 * A change request of Physical Port State from
1216 	 * 'Offline' to 'Polling' should be ignored.
1217 	 */
1218 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
1219 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
1220 		return HFI_TRANSITION_IGNORED;
1221 
1222 	/*
1223 	 * Either physical_allowed or logical_allowed is
1224 	 * HFI_TRANSITION_ALLOWED.
1225 	 */
1226 	return HFI_TRANSITION_ALLOWED;
1227 }
1228 
1229 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
1230 			   u32 logical_state, u32 phys_state,
1231 			   int suppress_idle_sma)
1232 {
1233 	struct hfi1_devdata *dd = ppd->dd;
1234 	u32 link_state;
1235 	int ret;
1236 
1237 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
1238 	if (ret == HFI_TRANSITION_DISALLOWED ||
1239 	    ret == HFI_TRANSITION_UNDEFINED) {
1240 		/* error message emitted above */
1241 		smp->status |= IB_SMP_INVALID_FIELD;
1242 		return 0;
1243 	}
1244 
1245 	if (ret == HFI_TRANSITION_IGNORED)
1246 		return 0;
1247 
1248 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
1249 	    !(logical_state == IB_PORT_DOWN ||
1250 	      logical_state == IB_PORT_NOP)){
1251 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
1252 			logical_state, phys_state);
1253 		smp->status |= IB_SMP_INVALID_FIELD;
1254 	}
1255 
1256 	/*
1257 	 * Logical state changes are summarized in OPAv1g1 spec.,
1258 	 * Table 9-12; physical state changes are summarized in
1259 	 * OPAv1g1 spec., Table 6.4.
1260 	 */
1261 	switch (logical_state) {
1262 	case IB_PORT_NOP:
1263 		if (phys_state == IB_PORTPHYSSTATE_NOP)
1264 			break;
1265 		/* FALLTHROUGH */
1266 	case IB_PORT_DOWN:
1267 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
1268 			link_state = HLS_DN_DOWNDEF;
1269 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1270 			link_state = HLS_DN_POLL;
1271 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1272 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1273 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1274 			link_state = HLS_DN_DISABLE;
1275 		} else {
1276 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1277 				phys_state);
1278 			smp->status |= IB_SMP_INVALID_FIELD;
1279 			break;
1280 		}
1281 
1282 		if ((link_state == HLS_DN_POLL ||
1283 		     link_state == HLS_DN_DOWNDEF)) {
1284 			/*
1285 			 * Going to poll.  No matter what the current state,
1286 			 * always move offline first, then tune and start the
1287 			 * link.  This correctly handles a FM link bounce and
1288 			 * a link enable.  Going offline is a no-op if already
1289 			 * offline.
1290 			 */
1291 			set_link_state(ppd, HLS_DN_OFFLINE);
1292 			start_link(ppd);
1293 		} else {
1294 			set_link_state(ppd, link_state);
1295 		}
1296 		if (link_state == HLS_DN_DISABLE &&
1297 		    (ppd->offline_disabled_reason >
1298 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1299 		     ppd->offline_disabled_reason ==
1300 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1301 			ppd->offline_disabled_reason =
1302 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1303 		/*
1304 		 * Don't send a reply if the response would be sent
1305 		 * through the disabled port.
1306 		 */
1307 		if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1308 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1309 		break;
1310 	case IB_PORT_ARMED:
1311 		ret = set_link_state(ppd, HLS_UP_ARMED);
1312 		if ((ret == 0) && (suppress_idle_sma == 0))
1313 			send_idle_sma(dd, SMA_IDLE_ARM);
1314 		break;
1315 	case IB_PORT_ACTIVE:
1316 		if (ppd->neighbor_normal) {
1317 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1318 			if (ret == 0)
1319 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1320 		} else {
1321 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1322 			smp->status |= IB_SMP_INVALID_FIELD;
1323 		}
1324 		break;
1325 	default:
1326 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1327 			logical_state);
1328 		smp->status |= IB_SMP_INVALID_FIELD;
1329 	}
1330 
1331 	return 0;
1332 }
1333 
1334 /**
1335  * subn_set_opa_portinfo - set port information
1336  * @smp: the incoming SM packet
1337  * @ibdev: the infiniband device
1338  * @port: the port on the device
1339  *
1340  */
1341 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1342 				   struct ib_device *ibdev, u8 port,
1343 				   u32 *resp_len, u32 max_len)
1344 {
1345 	struct opa_port_info *pi = (struct opa_port_info *)data;
1346 	struct ib_event event;
1347 	struct hfi1_devdata *dd;
1348 	struct hfi1_pportdata *ppd;
1349 	struct hfi1_ibport *ibp;
1350 	u8 clientrereg;
1351 	unsigned long flags;
1352 	u32 smlid;
1353 	u32 lid;
1354 	u8 ls_old, ls_new, ps_new;
1355 	u8 vls;
1356 	u8 msl;
1357 	u8 crc_enabled;
1358 	u16 lse, lwe, mtu;
1359 	u32 num_ports = OPA_AM_NPORT(am);
1360 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1361 	int ret, i, invalid = 0, call_set_mtu = 0;
1362 	int call_link_downgrade_policy = 0;
1363 
1364 	if (num_ports != 1 ||
1365 	    smp_length_check(sizeof(*pi), max_len)) {
1366 		smp->status |= IB_SMP_INVALID_FIELD;
1367 		return reply((struct ib_mad_hdr *)smp);
1368 	}
1369 
1370 	lid = be32_to_cpu(pi->lid);
1371 	if (lid & 0xFF000000) {
1372 		pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
1373 		smp->status |= IB_SMP_INVALID_FIELD;
1374 		goto get_only;
1375 	}
1376 
1377 
1378 	smlid = be32_to_cpu(pi->sm_lid);
1379 	if (smlid & 0xFF000000) {
1380 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1381 		smp->status |= IB_SMP_INVALID_FIELD;
1382 		goto get_only;
1383 	}
1384 
1385 	clientrereg = (pi->clientrereg_subnettimeout &
1386 			OPA_PI_MASK_CLIENT_REREGISTER);
1387 
1388 	dd = dd_from_ibdev(ibdev);
1389 	/* IB numbers ports from 1, hw from 0 */
1390 	ppd = dd->pport + (port - 1);
1391 	ibp = &ppd->ibport_data;
1392 	event.device = ibdev;
1393 	event.element.port_num = port;
1394 
1395 	ls_old = driver_lstate(ppd);
1396 
1397 	ibp->rvp.mkey = pi->mkey;
1398 	if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
1399 		ibp->rvp.gid_prefix = pi->subnet_prefix;
1400 		event.event = IB_EVENT_GID_CHANGE;
1401 		ib_dispatch_event(&event);
1402 	}
1403 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1404 
1405 	/* Must be a valid unicast LID address. */
1406 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1407 	     (hfi1_is_16B_mcast(lid))) {
1408 		smp->status |= IB_SMP_INVALID_FIELD;
1409 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1410 			lid);
1411 	} else if (ppd->lid != lid ||
1412 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1413 		if (ppd->lid != lid)
1414 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1415 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1416 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1417 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1418 		event.event = IB_EVENT_LID_CHANGE;
1419 		ib_dispatch_event(&event);
1420 
1421 		if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
1422 			/* Manufacture GID from LID to support extended
1423 			 * addresses
1424 			 */
1425 			ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
1426 				be64_to_cpu(OPA_MAKE_ID(lid));
1427 			event.event = IB_EVENT_GID_CHANGE;
1428 			ib_dispatch_event(&event);
1429 		}
1430 	}
1431 
1432 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1433 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1434 		ppd->linkinit_reason =
1435 			(pi->partenforce_filterraw &
1436 			 OPA_PI_MASK_LINKINIT_REASON);
1437 
1438 	/* Must be a valid unicast LID address. */
1439 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1440 	     (hfi1_is_16B_mcast(smlid))) {
1441 		smp->status |= IB_SMP_INVALID_FIELD;
1442 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1443 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1444 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1445 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1446 		if (ibp->rvp.sm_ah) {
1447 			if (smlid != ibp->rvp.sm_lid)
1448 				hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
1449 			if (msl != ibp->rvp.sm_sl)
1450 				rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
1451 		}
1452 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1453 		if (smlid != ibp->rvp.sm_lid)
1454 			ibp->rvp.sm_lid = smlid;
1455 		if (msl != ibp->rvp.sm_sl)
1456 			ibp->rvp.sm_sl = msl;
1457 		event.event = IB_EVENT_SM_CHANGE;
1458 		ib_dispatch_event(&event);
1459 	}
1460 
1461 	if (pi->link_down_reason == 0) {
1462 		ppd->local_link_down_reason.sma = 0;
1463 		ppd->local_link_down_reason.latest = 0;
1464 	}
1465 
1466 	if (pi->neigh_link_down_reason == 0) {
1467 		ppd->neigh_link_down_reason.sma = 0;
1468 		ppd->neigh_link_down_reason.latest = 0;
1469 	}
1470 
1471 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1472 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1473 
1474 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1475 	lwe = be16_to_cpu(pi->link_width.enabled);
1476 	if (lwe) {
1477 		if (lwe == OPA_LINK_WIDTH_RESET ||
1478 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1479 			set_link_width_enabled(ppd, ppd->link_width_supported);
1480 		else if ((lwe & ~ppd->link_width_supported) == 0)
1481 			set_link_width_enabled(ppd, lwe);
1482 		else
1483 			smp->status |= IB_SMP_INVALID_FIELD;
1484 	}
1485 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1486 	/* LWD.E is always applied - 0 means "disabled" */
1487 	if (lwe == OPA_LINK_WIDTH_RESET ||
1488 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1489 		set_link_width_downgrade_enabled(ppd,
1490 						 ppd->
1491 						 link_width_downgrade_supported
1492 						 );
1493 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1494 		/* only set and apply if something changed */
1495 		if (lwe != ppd->link_width_downgrade_enabled) {
1496 			set_link_width_downgrade_enabled(ppd, lwe);
1497 			call_link_downgrade_policy = 1;
1498 		}
1499 	} else {
1500 		smp->status |= IB_SMP_INVALID_FIELD;
1501 	}
1502 	lse = be16_to_cpu(pi->link_speed.enabled);
1503 	if (lse) {
1504 		if (lse & be16_to_cpu(pi->link_speed.supported))
1505 			set_link_speed_enabled(ppd, lse);
1506 		else
1507 			smp->status |= IB_SMP_INVALID_FIELD;
1508 	}
1509 
1510 	ibp->rvp.mkeyprot =
1511 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1512 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1513 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1514 				    ibp->rvp.vl_high_limit);
1515 
1516 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1517 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1518 		smp->status |= IB_SMP_INVALID_FIELD;
1519 		return reply((struct ib_mad_hdr *)smp);
1520 	}
1521 	for (i = 0; i < ppd->vls_supported; i++) {
1522 		if ((i % 2) == 0)
1523 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1524 					   4) & 0xF);
1525 		else
1526 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1527 					  0xF);
1528 		if (mtu == 0xffff) {
1529 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1530 				mtu,
1531 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1532 			smp->status |= IB_SMP_INVALID_FIELD;
1533 			mtu = hfi1_max_mtu; /* use a valid MTU */
1534 		}
1535 		if (dd->vld[i].mtu != mtu) {
1536 			dd_dev_info(dd,
1537 				    "MTU change on vl %d from %d to %d\n",
1538 				    i, dd->vld[i].mtu, mtu);
1539 			dd->vld[i].mtu = mtu;
1540 			call_set_mtu++;
1541 		}
1542 	}
1543 	/* As per OPAV1 spec: VL15 must support and be configured
1544 	 * for operation with a 2048 or larger MTU.
1545 	 */
1546 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1547 	if (mtu < 2048 || mtu == 0xffff)
1548 		mtu = 2048;
1549 	if (dd->vld[15].mtu != mtu) {
1550 		dd_dev_info(dd,
1551 			    "MTU change on vl 15 from %d to %d\n",
1552 			    dd->vld[15].mtu, mtu);
1553 		dd->vld[15].mtu = mtu;
1554 		call_set_mtu++;
1555 	}
1556 	if (call_set_mtu)
1557 		set_mtu(ppd);
1558 
1559 	/* Set operational VLs */
1560 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1561 	if (vls) {
1562 		if (vls > ppd->vls_supported) {
1563 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1564 				pi->operational_vls);
1565 			smp->status |= IB_SMP_INVALID_FIELD;
1566 		} else {
1567 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1568 					    vls) == -EINVAL)
1569 				smp->status |= IB_SMP_INVALID_FIELD;
1570 		}
1571 	}
1572 
1573 	if (pi->mkey_violations == 0)
1574 		ibp->rvp.mkey_violations = 0;
1575 
1576 	if (pi->pkey_violations == 0)
1577 		ibp->rvp.pkey_violations = 0;
1578 
1579 	if (pi->qkey_violations == 0)
1580 		ibp->rvp.qkey_violations = 0;
1581 
1582 	ibp->rvp.subnet_timeout =
1583 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1584 
1585 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1586 	crc_enabled >>= 4;
1587 	crc_enabled &= 0xf;
1588 
1589 	if (crc_enabled != 0)
1590 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1591 
1592 	ppd->is_active_optimize_enabled =
1593 			!!(be16_to_cpu(pi->port_mode)
1594 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1595 
1596 	ls_new = pi->port_states.portphysstate_portstate &
1597 			OPA_PI_MASK_PORT_STATE;
1598 	ps_new = (pi->port_states.portphysstate_portstate &
1599 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1600 
1601 	if (ls_old == IB_PORT_INIT) {
1602 		if (start_of_sm_config) {
1603 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1604 				ppd->is_sm_config_started = 1;
1605 		} else if (ls_new == IB_PORT_ARMED) {
1606 			if (ppd->is_sm_config_started == 0)
1607 				invalid = 1;
1608 		}
1609 	}
1610 
1611 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1612 	if (clientrereg) {
1613 		event.event = IB_EVENT_CLIENT_REREGISTER;
1614 		ib_dispatch_event(&event);
1615 	}
1616 
1617 	/*
1618 	 * Do the port state change now that the other link parameters
1619 	 * have been set.
1620 	 * Changing the port physical state only makes sense if the link
1621 	 * is down or is being set to down.
1622 	 */
1623 
1624 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1625 	if (ret)
1626 		return ret;
1627 
1628 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1629 				      max_len);
1630 
1631 	/* restore re-reg bit per o14-12.2.1 */
1632 	pi->clientrereg_subnettimeout |= clientrereg;
1633 
1634 	/*
1635 	 * Apply the new link downgrade policy.  This may result in a link
1636 	 * bounce.  Do this after everything else so things are settled.
1637 	 * Possible problem: if setting the port state above fails, then
1638 	 * the policy change is not applied.
1639 	 */
1640 	if (call_link_downgrade_policy)
1641 		apply_link_downgrade_policy(ppd, 0);
1642 
1643 	return ret;
1644 
1645 get_only:
1646 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1647 				       max_len);
1648 }
1649 
1650 /**
1651  * set_pkeys - set the PKEY table for ctxt 0
1652  * @dd: the hfi1_ib device
1653  * @port: the IB port number
1654  * @pkeys: the PKEY table
1655  */
1656 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1657 {
1658 	struct hfi1_pportdata *ppd;
1659 	int i;
1660 	int changed = 0;
1661 	int update_includes_mgmt_partition = 0;
1662 
1663 	/*
1664 	 * IB port one/two always maps to context zero/one,
1665 	 * always a kernel context, no locking needed
1666 	 * If we get here with ppd setup, no need to check
1667 	 * that rcd is valid.
1668 	 */
1669 	ppd = dd->pport + (port - 1);
1670 	/*
1671 	 * If the update does not include the management pkey, don't do it.
1672 	 */
1673 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1674 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1675 			update_includes_mgmt_partition = 1;
1676 			break;
1677 		}
1678 	}
1679 
1680 	if (!update_includes_mgmt_partition)
1681 		return 1;
1682 
1683 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1684 		u16 key = pkeys[i];
1685 		u16 okey = ppd->pkeys[i];
1686 
1687 		if (key == okey)
1688 			continue;
1689 		/*
1690 		 * The SM gives us the complete PKey table. We have
1691 		 * to ensure that we put the PKeys in the matching
1692 		 * slots.
1693 		 */
1694 		ppd->pkeys[i] = key;
1695 		changed = 1;
1696 	}
1697 
1698 	if (changed) {
1699 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1700 		hfi1_event_pkey_change(dd, port);
1701 	}
1702 
1703 	return 0;
1704 }
1705 
1706 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1707 				    struct ib_device *ibdev, u8 port,
1708 				    u32 *resp_len, u32 max_len)
1709 {
1710 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1711 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1712 	u32 start_block = am & 0x7ff;
1713 	u16 *p = (u16 *)data;
1714 	__be16 *q = (__be16 *)data;
1715 	int i;
1716 	u16 n_blocks_avail;
1717 	unsigned npkeys = hfi1_get_npkeys(dd);
1718 	u32 size = 0;
1719 
1720 	if (n_blocks_sent == 0) {
1721 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1722 			port, start_block, n_blocks_sent);
1723 		smp->status |= IB_SMP_INVALID_FIELD;
1724 		return reply((struct ib_mad_hdr *)smp);
1725 	}
1726 
1727 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1728 
1729 	size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
1730 
1731 	if (smp_length_check(size, max_len)) {
1732 		smp->status |= IB_SMP_INVALID_FIELD;
1733 		return reply((struct ib_mad_hdr *)smp);
1734 	}
1735 
1736 	if (start_block + n_blocks_sent > n_blocks_avail ||
1737 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1738 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1739 			start_block, n_blocks_sent, n_blocks_avail,
1740 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1741 		smp->status |= IB_SMP_INVALID_FIELD;
1742 		return reply((struct ib_mad_hdr *)smp);
1743 	}
1744 
1745 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1746 		p[i] = be16_to_cpu(q[i]);
1747 
1748 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1749 		smp->status |= IB_SMP_INVALID_FIELD;
1750 		return reply((struct ib_mad_hdr *)smp);
1751 	}
1752 
1753 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
1754 					max_len);
1755 }
1756 
1757 #define ILLEGAL_VL 12
1758 /*
1759  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1760  * for SC15, which must map to VL15). If we don't remap things this
1761  * way it is possible for VL15 counters to increment when we try to
1762  * send on a SC which is mapped to an invalid VL.
1763  * When getting the table convert ILLEGAL_VL back to VL15.
1764  */
1765 static void filter_sc2vlt(void *data, bool set)
1766 {
1767 	int i;
1768 	u8 *pd = data;
1769 
1770 	for (i = 0; i < OPA_MAX_SCS; i++) {
1771 		if (i == 15)
1772 			continue;
1773 
1774 		if (set) {
1775 			if ((pd[i] & 0x1f) == 0xf)
1776 				pd[i] = ILLEGAL_VL;
1777 		} else {
1778 			if ((pd[i] & 0x1f) == ILLEGAL_VL)
1779 				pd[i] = 0xf;
1780 		}
1781 	}
1782 }
1783 
1784 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1785 {
1786 	u64 *val = data;
1787 
1788 	filter_sc2vlt(data, true);
1789 
1790 	write_csr(dd, SEND_SC2VLT0, *val++);
1791 	write_csr(dd, SEND_SC2VLT1, *val++);
1792 	write_csr(dd, SEND_SC2VLT2, *val++);
1793 	write_csr(dd, SEND_SC2VLT3, *val++);
1794 	write_seqlock_irq(&dd->sc2vl_lock);
1795 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1796 	write_sequnlock_irq(&dd->sc2vl_lock);
1797 	return 0;
1798 }
1799 
1800 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1801 {
1802 	u64 *val = (u64 *)data;
1803 
1804 	*val++ = read_csr(dd, SEND_SC2VLT0);
1805 	*val++ = read_csr(dd, SEND_SC2VLT1);
1806 	*val++ = read_csr(dd, SEND_SC2VLT2);
1807 	*val++ = read_csr(dd, SEND_SC2VLT3);
1808 
1809 	filter_sc2vlt((u64 *)data, false);
1810 	return 0;
1811 }
1812 
1813 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1814 				   struct ib_device *ibdev, u8 port,
1815 				   u32 *resp_len, u32 max_len)
1816 {
1817 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1818 	u8 *p = data;
1819 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1820 	unsigned i;
1821 
1822 	if (am || smp_length_check(size, max_len)) {
1823 		smp->status |= IB_SMP_INVALID_FIELD;
1824 		return reply((struct ib_mad_hdr *)smp);
1825 	}
1826 
1827 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1828 		*p++ = ibp->sl_to_sc[i];
1829 
1830 	if (resp_len)
1831 		*resp_len += size;
1832 
1833 	return reply((struct ib_mad_hdr *)smp);
1834 }
1835 
1836 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1837 				   struct ib_device *ibdev, u8 port,
1838 				   u32 *resp_len, u32 max_len)
1839 {
1840 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1841 	u8 *p = data;
1842 	size_t size = ARRAY_SIZE(ibp->sl_to_sc);
1843 	int i;
1844 	u8 sc;
1845 
1846 	if (am || smp_length_check(size, max_len)) {
1847 		smp->status |= IB_SMP_INVALID_FIELD;
1848 		return reply((struct ib_mad_hdr *)smp);
1849 	}
1850 
1851 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1852 		sc = *p++;
1853 		if (ibp->sl_to_sc[i] != sc) {
1854 			ibp->sl_to_sc[i] = sc;
1855 
1856 			/* Put all stale qps into error state */
1857 			hfi1_error_port_qps(ibp, i);
1858 		}
1859 	}
1860 
1861 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
1862 				       max_len);
1863 }
1864 
1865 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1866 				   struct ib_device *ibdev, u8 port,
1867 				   u32 *resp_len, u32 max_len)
1868 {
1869 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1870 	u8 *p = data;
1871 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1872 	unsigned i;
1873 
1874 	if (am || smp_length_check(size, max_len)) {
1875 		smp->status |= IB_SMP_INVALID_FIELD;
1876 		return reply((struct ib_mad_hdr *)smp);
1877 	}
1878 
1879 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1880 		*p++ = ibp->sc_to_sl[i];
1881 
1882 	if (resp_len)
1883 		*resp_len += size;
1884 
1885 	return reply((struct ib_mad_hdr *)smp);
1886 }
1887 
1888 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1889 				   struct ib_device *ibdev, u8 port,
1890 				   u32 *resp_len, u32 max_len)
1891 {
1892 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1893 	size_t size = ARRAY_SIZE(ibp->sc_to_sl);
1894 	u8 *p = data;
1895 	int i;
1896 
1897 	if (am || smp_length_check(size, max_len)) {
1898 		smp->status |= IB_SMP_INVALID_FIELD;
1899 		return reply((struct ib_mad_hdr *)smp);
1900 	}
1901 
1902 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1903 		ibp->sc_to_sl[i] = *p++;
1904 
1905 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
1906 				       max_len);
1907 }
1908 
1909 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1910 				    struct ib_device *ibdev, u8 port,
1911 				    u32 *resp_len, u32 max_len)
1912 {
1913 	u32 n_blocks = OPA_AM_NBLK(am);
1914 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1915 	void *vp = (void *)data;
1916 	size_t size = 4 * sizeof(u64);
1917 
1918 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1919 		smp->status |= IB_SMP_INVALID_FIELD;
1920 		return reply((struct ib_mad_hdr *)smp);
1921 	}
1922 
1923 	get_sc2vlt_tables(dd, vp);
1924 
1925 	if (resp_len)
1926 		*resp_len += size;
1927 
1928 	return reply((struct ib_mad_hdr *)smp);
1929 }
1930 
1931 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1932 				    struct ib_device *ibdev, u8 port,
1933 				    u32 *resp_len, u32 max_len)
1934 {
1935 	u32 n_blocks = OPA_AM_NBLK(am);
1936 	int async_update = OPA_AM_ASYNC(am);
1937 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1938 	void *vp = (void *)data;
1939 	struct hfi1_pportdata *ppd;
1940 	int lstate;
1941 	/*
1942 	 * set_sc2vlt_tables writes the information contained in *data
1943 	 * to four 64-bit registers SendSC2VLt[0-3]. We need to make
1944 	 * sure *max_len is not greater than the total size of the four
1945 	 * SendSC2VLt[0-3] registers.
1946 	 */
1947 	size_t size = 4 * sizeof(u64);
1948 
1949 	if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
1950 		smp->status |= IB_SMP_INVALID_FIELD;
1951 		return reply((struct ib_mad_hdr *)smp);
1952 	}
1953 
1954 	/* IB numbers ports from 1, hw from 0 */
1955 	ppd = dd->pport + (port - 1);
1956 	lstate = driver_lstate(ppd);
1957 	/*
1958 	 * it's known that async_update is 0 by this point, but include
1959 	 * the explicit check for clarity
1960 	 */
1961 	if (!async_update &&
1962 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1963 		smp->status |= IB_SMP_INVALID_FIELD;
1964 		return reply((struct ib_mad_hdr *)smp);
1965 	}
1966 
1967 	set_sc2vlt_tables(dd, vp);
1968 
1969 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
1970 					max_len);
1971 }
1972 
1973 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1974 				     struct ib_device *ibdev, u8 port,
1975 				     u32 *resp_len, u32 max_len)
1976 {
1977 	u32 n_blocks = OPA_AM_NPORT(am);
1978 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1979 	struct hfi1_pportdata *ppd;
1980 	void *vp = (void *)data;
1981 	int size = sizeof(struct sc2vlnt);
1982 
1983 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1984 		smp->status |= IB_SMP_INVALID_FIELD;
1985 		return reply((struct ib_mad_hdr *)smp);
1986 	}
1987 
1988 	ppd = dd->pport + (port - 1);
1989 
1990 	fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1991 
1992 	if (resp_len)
1993 		*resp_len += size;
1994 
1995 	return reply((struct ib_mad_hdr *)smp);
1996 }
1997 
1998 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1999 				     struct ib_device *ibdev, u8 port,
2000 				     u32 *resp_len, u32 max_len)
2001 {
2002 	u32 n_blocks = OPA_AM_NPORT(am);
2003 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2004 	struct hfi1_pportdata *ppd;
2005 	void *vp = (void *)data;
2006 	int lstate;
2007 	int size = sizeof(struct sc2vlnt);
2008 
2009 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
2010 		smp->status |= IB_SMP_INVALID_FIELD;
2011 		return reply((struct ib_mad_hdr *)smp);
2012 	}
2013 
2014 	/* IB numbers ports from 1, hw from 0 */
2015 	ppd = dd->pport + (port - 1);
2016 	lstate = driver_lstate(ppd);
2017 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
2018 		smp->status |= IB_SMP_INVALID_FIELD;
2019 		return reply((struct ib_mad_hdr *)smp);
2020 	}
2021 
2022 	ppd = dd->pport + (port - 1);
2023 
2024 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
2025 
2026 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
2027 					 resp_len, max_len);
2028 }
2029 
2030 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2031 			      struct ib_device *ibdev, u8 port,
2032 			      u32 *resp_len, u32 max_len)
2033 {
2034 	u32 nports = OPA_AM_NPORT(am);
2035 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2036 	u32 lstate;
2037 	struct hfi1_ibport *ibp;
2038 	struct hfi1_pportdata *ppd;
2039 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2040 
2041 	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2042 		smp->status |= IB_SMP_INVALID_FIELD;
2043 		return reply((struct ib_mad_hdr *)smp);
2044 	}
2045 
2046 	ibp = to_iport(ibdev, port);
2047 	ppd = ppd_from_ibp(ibp);
2048 
2049 	lstate = driver_lstate(ppd);
2050 
2051 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
2052 		ppd->is_sm_config_started = 1;
2053 
2054 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
2055 	psi->port_states.ledenable_offlinereason |=
2056 		ppd->is_sm_config_started << 5;
2057 	psi->port_states.ledenable_offlinereason |=
2058 		ppd->offline_disabled_reason;
2059 
2060 	psi->port_states.portphysstate_portstate =
2061 		(driver_pstate(ppd) << 4) | (lstate & 0xf);
2062 	psi->link_width_downgrade_tx_active =
2063 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
2064 	psi->link_width_downgrade_rx_active =
2065 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
2066 	if (resp_len)
2067 		*resp_len += sizeof(struct opa_port_state_info);
2068 
2069 	return reply((struct ib_mad_hdr *)smp);
2070 }
2071 
2072 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2073 			      struct ib_device *ibdev, u8 port,
2074 			      u32 *resp_len, u32 max_len)
2075 {
2076 	u32 nports = OPA_AM_NPORT(am);
2077 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2078 	u32 ls_old;
2079 	u8 ls_new, ps_new;
2080 	struct hfi1_ibport *ibp;
2081 	struct hfi1_pportdata *ppd;
2082 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2083 	int ret, invalid = 0;
2084 
2085 	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2086 		smp->status |= IB_SMP_INVALID_FIELD;
2087 		return reply((struct ib_mad_hdr *)smp);
2088 	}
2089 
2090 	ibp = to_iport(ibdev, port);
2091 	ppd = ppd_from_ibp(ibp);
2092 
2093 	ls_old = driver_lstate(ppd);
2094 
2095 	ls_new = port_states_to_logical_state(&psi->port_states);
2096 	ps_new = port_states_to_phys_state(&psi->port_states);
2097 
2098 	if (ls_old == IB_PORT_INIT) {
2099 		if (start_of_sm_config) {
2100 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
2101 				ppd->is_sm_config_started = 1;
2102 		} else if (ls_new == IB_PORT_ARMED) {
2103 			if (ppd->is_sm_config_started == 0)
2104 				invalid = 1;
2105 		}
2106 	}
2107 
2108 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
2109 	if (ret)
2110 		return ret;
2111 
2112 	if (invalid)
2113 		smp->status |= IB_SMP_INVALID_FIELD;
2114 
2115 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
2116 				  max_len);
2117 }
2118 
2119 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
2120 				     struct ib_device *ibdev, u8 port,
2121 				     u32 *resp_len, u32 max_len)
2122 {
2123 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2124 	u32 addr = OPA_AM_CI_ADDR(am);
2125 	u32 len = OPA_AM_CI_LEN(am) + 1;
2126 	int ret;
2127 
2128 	if (dd->pport->port_type != PORT_TYPE_QSFP ||
2129 	    smp_length_check(len, max_len)) {
2130 		smp->status |= IB_SMP_INVALID_FIELD;
2131 		return reply((struct ib_mad_hdr *)smp);
2132 	}
2133 
2134 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
2135 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
2136 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
2137 
2138 	/*
2139 	 * check that addr is within spec, and
2140 	 * addr and (addr + len - 1) are on the same "page"
2141 	 */
2142 	if (addr >= 4096 ||
2143 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
2144 		smp->status |= IB_SMP_INVALID_FIELD;
2145 		return reply((struct ib_mad_hdr *)smp);
2146 	}
2147 
2148 	ret = get_cable_info(dd, port, addr, len, data);
2149 
2150 	if (ret == -ENODEV) {
2151 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2152 		return reply((struct ib_mad_hdr *)smp);
2153 	}
2154 
2155 	/* The address range for the CableInfo SMA query is wider than the
2156 	 * memory available on the QSFP cable. We want to return a valid
2157 	 * response, albeit zeroed out, for address ranges beyond available
2158 	 * memory but that are within the CableInfo query spec
2159 	 */
2160 	if (ret < 0 && ret != -ERANGE) {
2161 		smp->status |= IB_SMP_INVALID_FIELD;
2162 		return reply((struct ib_mad_hdr *)smp);
2163 	}
2164 
2165 	if (resp_len)
2166 		*resp_len += len;
2167 
2168 	return reply((struct ib_mad_hdr *)smp);
2169 }
2170 
2171 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2172 			      struct ib_device *ibdev, u8 port, u32 *resp_len,
2173 			      u32 max_len)
2174 {
2175 	u32 num_ports = OPA_AM_NPORT(am);
2176 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2177 	struct hfi1_pportdata *ppd;
2178 	struct buffer_control *p = (struct buffer_control *)data;
2179 	int size = sizeof(struct buffer_control);
2180 
2181 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2182 		smp->status |= IB_SMP_INVALID_FIELD;
2183 		return reply((struct ib_mad_hdr *)smp);
2184 	}
2185 
2186 	ppd = dd->pport + (port - 1);
2187 	fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
2188 	trace_bct_get(dd, p);
2189 	if (resp_len)
2190 		*resp_len += size;
2191 
2192 	return reply((struct ib_mad_hdr *)smp);
2193 }
2194 
2195 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2196 			      struct ib_device *ibdev, u8 port, u32 *resp_len,
2197 			      u32 max_len)
2198 {
2199 	u32 num_ports = OPA_AM_NPORT(am);
2200 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2201 	struct hfi1_pportdata *ppd;
2202 	struct buffer_control *p = (struct buffer_control *)data;
2203 
2204 	if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
2205 		smp->status |= IB_SMP_INVALID_FIELD;
2206 		return reply((struct ib_mad_hdr *)smp);
2207 	}
2208 	ppd = dd->pport + (port - 1);
2209 	trace_bct_set(dd, p);
2210 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
2211 		smp->status |= IB_SMP_INVALID_FIELD;
2212 		return reply((struct ib_mad_hdr *)smp);
2213 	}
2214 
2215 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
2216 				  max_len);
2217 }
2218 
2219 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2220 				 struct ib_device *ibdev, u8 port,
2221 				 u32 *resp_len, u32 max_len)
2222 {
2223 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2224 	u32 num_ports = OPA_AM_NPORT(am);
2225 	u8 section = (am & 0x00ff0000) >> 16;
2226 	u8 *p = data;
2227 	int size = 256;
2228 
2229 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2230 		smp->status |= IB_SMP_INVALID_FIELD;
2231 		return reply((struct ib_mad_hdr *)smp);
2232 	}
2233 
2234 	switch (section) {
2235 	case OPA_VLARB_LOW_ELEMENTS:
2236 		fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
2237 		break;
2238 	case OPA_VLARB_HIGH_ELEMENTS:
2239 		fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2240 		break;
2241 	case OPA_VLARB_PREEMPT_ELEMENTS:
2242 		fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
2243 		break;
2244 	case OPA_VLARB_PREEMPT_MATRIX:
2245 		fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
2246 		break;
2247 	default:
2248 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
2249 			be32_to_cpu(smp->attr_mod));
2250 		smp->status |= IB_SMP_INVALID_FIELD;
2251 		size = 0;
2252 		break;
2253 	}
2254 
2255 	if (size > 0 && resp_len)
2256 		*resp_len += size;
2257 
2258 	return reply((struct ib_mad_hdr *)smp);
2259 }
2260 
2261 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2262 				 struct ib_device *ibdev, u8 port,
2263 				 u32 *resp_len, u32 max_len)
2264 {
2265 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2266 	u32 num_ports = OPA_AM_NPORT(am);
2267 	u8 section = (am & 0x00ff0000) >> 16;
2268 	u8 *p = data;
2269 	int size = 256;
2270 
2271 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2272 		smp->status |= IB_SMP_INVALID_FIELD;
2273 		return reply((struct ib_mad_hdr *)smp);
2274 	}
2275 
2276 	switch (section) {
2277 	case OPA_VLARB_LOW_ELEMENTS:
2278 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
2279 		break;
2280 	case OPA_VLARB_HIGH_ELEMENTS:
2281 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2282 		break;
2283 	/*
2284 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
2285 	 * can be changed from the default values
2286 	 */
2287 	case OPA_VLARB_PREEMPT_ELEMENTS:
2288 		/* FALLTHROUGH */
2289 	case OPA_VLARB_PREEMPT_MATRIX:
2290 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2291 		break;
2292 	default:
2293 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
2294 			be32_to_cpu(smp->attr_mod));
2295 		smp->status |= IB_SMP_INVALID_FIELD;
2296 		break;
2297 	}
2298 
2299 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
2300 				     max_len);
2301 }
2302 
2303 struct opa_pma_mad {
2304 	struct ib_mad_hdr mad_hdr;
2305 	u8 data[2024];
2306 } __packed;
2307 
2308 struct opa_port_status_req {
2309 	__u8 port_num;
2310 	__u8 reserved[3];
2311 	__be32 vl_select_mask;
2312 };
2313 
2314 #define VL_MASK_ALL		0x000080ff
2315 
2316 struct opa_port_status_rsp {
2317 	__u8 port_num;
2318 	__u8 reserved[3];
2319 	__be32  vl_select_mask;
2320 
2321 	/* Data counters */
2322 	__be64 port_xmit_data;
2323 	__be64 port_rcv_data;
2324 	__be64 port_xmit_pkts;
2325 	__be64 port_rcv_pkts;
2326 	__be64 port_multicast_xmit_pkts;
2327 	__be64 port_multicast_rcv_pkts;
2328 	__be64 port_xmit_wait;
2329 	__be64 sw_port_congestion;
2330 	__be64 port_rcv_fecn;
2331 	__be64 port_rcv_becn;
2332 	__be64 port_xmit_time_cong;
2333 	__be64 port_xmit_wasted_bw;
2334 	__be64 port_xmit_wait_data;
2335 	__be64 port_rcv_bubble;
2336 	__be64 port_mark_fecn;
2337 	/* Error counters */
2338 	__be64 port_rcv_constraint_errors;
2339 	__be64 port_rcv_switch_relay_errors;
2340 	__be64 port_xmit_discards;
2341 	__be64 port_xmit_constraint_errors;
2342 	__be64 port_rcv_remote_physical_errors;
2343 	__be64 local_link_integrity_errors;
2344 	__be64 port_rcv_errors;
2345 	__be64 excessive_buffer_overruns;
2346 	__be64 fm_config_errors;
2347 	__be32 link_error_recovery;
2348 	__be32 link_downed;
2349 	u8 uncorrectable_errors;
2350 
2351 	u8 link_quality_indicator; /* 5res, 3bit */
2352 	u8 res2[6];
2353 	struct _vls_pctrs {
2354 		/* per-VL Data counters */
2355 		__be64 port_vl_xmit_data;
2356 		__be64 port_vl_rcv_data;
2357 		__be64 port_vl_xmit_pkts;
2358 		__be64 port_vl_rcv_pkts;
2359 		__be64 port_vl_xmit_wait;
2360 		__be64 sw_port_vl_congestion;
2361 		__be64 port_vl_rcv_fecn;
2362 		__be64 port_vl_rcv_becn;
2363 		__be64 port_xmit_time_cong;
2364 		__be64 port_vl_xmit_wasted_bw;
2365 		__be64 port_vl_xmit_wait_data;
2366 		__be64 port_vl_rcv_bubble;
2367 		__be64 port_vl_mark_fecn;
2368 		__be64 port_vl_xmit_discards;
2369 	} vls[0]; /* real array size defined by # bits set in vl_select_mask */
2370 };
2371 
2372 enum counter_selects {
2373 	CS_PORT_XMIT_DATA			= (1 << 31),
2374 	CS_PORT_RCV_DATA			= (1 << 30),
2375 	CS_PORT_XMIT_PKTS			= (1 << 29),
2376 	CS_PORT_RCV_PKTS			= (1 << 28),
2377 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2378 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2379 	CS_PORT_XMIT_WAIT			= (1 << 25),
2380 	CS_SW_PORT_CONGESTION			= (1 << 24),
2381 	CS_PORT_RCV_FECN			= (1 << 23),
2382 	CS_PORT_RCV_BECN			= (1 << 22),
2383 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2384 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2385 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2386 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2387 	CS_PORT_MARK_FECN			= (1 << 17),
2388 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2389 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2390 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2391 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2392 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2393 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2394 	CS_PORT_RCV_ERRORS			= (1 << 10),
2395 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2396 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2397 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2398 	CS_LINK_DOWNED				= (1 << 6),
2399 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2400 };
2401 
2402 struct opa_clear_port_status {
2403 	__be64 port_select_mask[4];
2404 	__be32 counter_select_mask;
2405 };
2406 
2407 struct opa_aggregate {
2408 	__be16 attr_id;
2409 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2410 	__be32 attr_mod;
2411 	u8 data[0];
2412 };
2413 
2414 #define MSK_LLI 0x000000f0
2415 #define MSK_LLI_SFT 4
2416 #define MSK_LER 0x0000000f
2417 #define MSK_LER_SFT 0
2418 #define ADD_LLI 8
2419 #define ADD_LER 2
2420 
2421 /* Request contains first three fields, response contains those plus the rest */
2422 struct opa_port_data_counters_msg {
2423 	__be64 port_select_mask[4];
2424 	__be32 vl_select_mask;
2425 	__be32 resolution;
2426 
2427 	/* Response fields follow */
2428 	struct _port_dctrs {
2429 		u8 port_number;
2430 		u8 reserved2[3];
2431 		__be32 link_quality_indicator; /* 29res, 3bit */
2432 
2433 		/* Data counters */
2434 		__be64 port_xmit_data;
2435 		__be64 port_rcv_data;
2436 		__be64 port_xmit_pkts;
2437 		__be64 port_rcv_pkts;
2438 		__be64 port_multicast_xmit_pkts;
2439 		__be64 port_multicast_rcv_pkts;
2440 		__be64 port_xmit_wait;
2441 		__be64 sw_port_congestion;
2442 		__be64 port_rcv_fecn;
2443 		__be64 port_rcv_becn;
2444 		__be64 port_xmit_time_cong;
2445 		__be64 port_xmit_wasted_bw;
2446 		__be64 port_xmit_wait_data;
2447 		__be64 port_rcv_bubble;
2448 		__be64 port_mark_fecn;
2449 
2450 		__be64 port_error_counter_summary;
2451 		/* Sum of error counts/port */
2452 
2453 		struct _vls_dctrs {
2454 			/* per-VL Data counters */
2455 			__be64 port_vl_xmit_data;
2456 			__be64 port_vl_rcv_data;
2457 			__be64 port_vl_xmit_pkts;
2458 			__be64 port_vl_rcv_pkts;
2459 			__be64 port_vl_xmit_wait;
2460 			__be64 sw_port_vl_congestion;
2461 			__be64 port_vl_rcv_fecn;
2462 			__be64 port_vl_rcv_becn;
2463 			__be64 port_xmit_time_cong;
2464 			__be64 port_vl_xmit_wasted_bw;
2465 			__be64 port_vl_xmit_wait_data;
2466 			__be64 port_vl_rcv_bubble;
2467 			__be64 port_vl_mark_fecn;
2468 		} vls[0];
2469 		/* array size defined by #bits set in vl_select_mask*/
2470 	} port[1]; /* array size defined by  #ports in attribute modifier */
2471 };
2472 
2473 struct opa_port_error_counters64_msg {
2474 	/*
2475 	 * Request contains first two fields, response contains the
2476 	 * whole magilla
2477 	 */
2478 	__be64 port_select_mask[4];
2479 	__be32 vl_select_mask;
2480 
2481 	/* Response-only fields follow */
2482 	__be32 reserved1;
2483 	struct _port_ectrs {
2484 		u8 port_number;
2485 		u8 reserved2[7];
2486 		__be64 port_rcv_constraint_errors;
2487 		__be64 port_rcv_switch_relay_errors;
2488 		__be64 port_xmit_discards;
2489 		__be64 port_xmit_constraint_errors;
2490 		__be64 port_rcv_remote_physical_errors;
2491 		__be64 local_link_integrity_errors;
2492 		__be64 port_rcv_errors;
2493 		__be64 excessive_buffer_overruns;
2494 		__be64 fm_config_errors;
2495 		__be32 link_error_recovery;
2496 		__be32 link_downed;
2497 		u8 uncorrectable_errors;
2498 		u8 reserved3[7];
2499 		struct _vls_ectrs {
2500 			__be64 port_vl_xmit_discards;
2501 		} vls[0];
2502 		/* array size defined by #bits set in vl_select_mask */
2503 	} port[1]; /* array size defined by #ports in attribute modifier */
2504 };
2505 
2506 struct opa_port_error_info_msg {
2507 	__be64 port_select_mask[4];
2508 	__be32 error_info_select_mask;
2509 	__be32 reserved1;
2510 	struct _port_ei {
2511 		u8 port_number;
2512 		u8 reserved2[7];
2513 
2514 		/* PortRcvErrorInfo */
2515 		struct {
2516 			u8 status_and_code;
2517 			union {
2518 				u8 raw[17];
2519 				struct {
2520 					/* EI1to12 format */
2521 					u8 packet_flit1[8];
2522 					u8 packet_flit2[8];
2523 					u8 remaining_flit_bits12;
2524 				} ei1to12;
2525 				struct {
2526 					u8 packet_bytes[8];
2527 					u8 remaining_flit_bits;
2528 				} ei13;
2529 			} ei;
2530 			u8 reserved3[6];
2531 		} __packed port_rcv_ei;
2532 
2533 		/* ExcessiveBufferOverrunInfo */
2534 		struct {
2535 			u8 status_and_sc;
2536 			u8 reserved4[7];
2537 		} __packed excessive_buffer_overrun_ei;
2538 
2539 		/* PortXmitConstraintErrorInfo */
2540 		struct {
2541 			u8 status;
2542 			u8 reserved5;
2543 			__be16 pkey;
2544 			__be32 slid;
2545 		} __packed port_xmit_constraint_ei;
2546 
2547 		/* PortRcvConstraintErrorInfo */
2548 		struct {
2549 			u8 status;
2550 			u8 reserved6;
2551 			__be16 pkey;
2552 			__be32 slid;
2553 		} __packed port_rcv_constraint_ei;
2554 
2555 		/* PortRcvSwitchRelayErrorInfo */
2556 		struct {
2557 			u8 status_and_code;
2558 			u8 reserved7[3];
2559 			__u32 error_info;
2560 		} __packed port_rcv_switch_relay_ei;
2561 
2562 		/* UncorrectableErrorInfo */
2563 		struct {
2564 			u8 status_and_code;
2565 			u8 reserved8;
2566 		} __packed uncorrectable_ei;
2567 
2568 		/* FMConfigErrorInfo */
2569 		struct {
2570 			u8 status_and_code;
2571 			u8 error_info;
2572 		} __packed fm_config_ei;
2573 		__u32 reserved9;
2574 	} port[1]; /* actual array size defined by #ports in attr modifier */
2575 };
2576 
2577 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2578 enum error_info_selects {
2579 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2580 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2581 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2582 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2583 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2584 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2585 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2586 };
2587 
2588 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2589 				     struct ib_device *ibdev, u32 *resp_len)
2590 {
2591 	struct opa_class_port_info *p =
2592 		(struct opa_class_port_info *)pmp->data;
2593 
2594 	memset(pmp->data, 0, sizeof(pmp->data));
2595 
2596 	if (pmp->mad_hdr.attr_mod != 0)
2597 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2598 
2599 	p->base_version = OPA_MGMT_BASE_VERSION;
2600 	p->class_version = OPA_SM_CLASS_VERSION;
2601 	/*
2602 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2603 	 */
2604 	p->cap_mask2_resp_time = cpu_to_be32(18);
2605 
2606 	if (resp_len)
2607 		*resp_len += sizeof(*p);
2608 
2609 	return reply((struct ib_mad_hdr *)pmp);
2610 }
2611 
2612 static void a0_portstatus(struct hfi1_pportdata *ppd,
2613 			  struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2614 {
2615 	if (!is_bx(ppd->dd)) {
2616 		unsigned long vl;
2617 		u64 sum_vl_xmit_wait = 0;
2618 		u32 vl_all_mask = VL_MASK_ALL;
2619 
2620 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2621 				 8 * sizeof(vl_all_mask)) {
2622 			u64 tmp = sum_vl_xmit_wait +
2623 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2624 						 idx_from_vl(vl));
2625 			if (tmp < sum_vl_xmit_wait) {
2626 				/* we wrapped */
2627 				sum_vl_xmit_wait = (u64)~0;
2628 				break;
2629 			}
2630 			sum_vl_xmit_wait = tmp;
2631 		}
2632 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2633 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2634 	}
2635 }
2636 
2637 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2638 				  struct ib_device *ibdev,
2639 				  u8 port, u32 *resp_len)
2640 {
2641 	struct opa_port_status_req *req =
2642 		(struct opa_port_status_req *)pmp->data;
2643 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2644 	struct opa_port_status_rsp *rsp;
2645 	u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2646 	unsigned long vl;
2647 	size_t response_data_size;
2648 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2649 	u8 port_num = req->port_num;
2650 	u8 num_vls = hweight32(vl_select_mask);
2651 	struct _vls_pctrs *vlinfo;
2652 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2653 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2654 	int vfi;
2655 	u64 tmp, tmp2;
2656 
2657 	response_data_size = sizeof(struct opa_port_status_rsp) +
2658 				num_vls * sizeof(struct _vls_pctrs);
2659 	if (response_data_size > sizeof(pmp->data)) {
2660 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2661 		return reply((struct ib_mad_hdr *)pmp);
2662 	}
2663 
2664 	if (nports != 1 || (port_num && port_num != port) ||
2665 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2666 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2667 		return reply((struct ib_mad_hdr *)pmp);
2668 	}
2669 
2670 	memset(pmp->data, 0, sizeof(pmp->data));
2671 
2672 	rsp = (struct opa_port_status_rsp *)pmp->data;
2673 	if (port_num)
2674 		rsp->port_num = port_num;
2675 	else
2676 		rsp->port_num = port;
2677 
2678 	rsp->port_rcv_constraint_errors =
2679 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2680 					   CNTR_INVALID_VL));
2681 
2682 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2683 
2684 	rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2685 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2686 					  CNTR_INVALID_VL));
2687 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2688 					 CNTR_INVALID_VL));
2689 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2690 					  CNTR_INVALID_VL));
2691 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2692 					 CNTR_INVALID_VL));
2693 	rsp->port_multicast_xmit_pkts =
2694 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2695 					  CNTR_INVALID_VL));
2696 	rsp->port_multicast_rcv_pkts =
2697 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2698 					  CNTR_INVALID_VL));
2699 	rsp->port_xmit_wait =
2700 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2701 	rsp->port_rcv_fecn =
2702 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2703 	rsp->port_rcv_becn =
2704 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2705 	rsp->port_xmit_discards =
2706 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2707 					   CNTR_INVALID_VL));
2708 	rsp->port_xmit_constraint_errors =
2709 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2710 					   CNTR_INVALID_VL));
2711 	rsp->port_rcv_remote_physical_errors =
2712 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2713 					  CNTR_INVALID_VL));
2714 	rsp->local_link_integrity_errors =
2715 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2716 					  CNTR_INVALID_VL));
2717 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2718 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2719 				   CNTR_INVALID_VL);
2720 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2721 		/* overflow/wrapped */
2722 		rsp->link_error_recovery = cpu_to_be32(~0);
2723 	} else {
2724 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2725 	}
2726 	rsp->port_rcv_errors =
2727 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2728 	rsp->excessive_buffer_overruns =
2729 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2730 	rsp->fm_config_errors =
2731 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2732 					  CNTR_INVALID_VL));
2733 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2734 						      CNTR_INVALID_VL));
2735 
2736 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2737 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2738 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2739 
2740 	vlinfo = &rsp->vls[0];
2741 	vfi = 0;
2742 	/* The vl_select_mask has been checked above, and we know
2743 	 * that it contains only entries which represent valid VLs.
2744 	 * So in the for_each_set_bit() loop below, we don't need
2745 	 * any additional checks for vl.
2746 	 */
2747 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2748 			 8 * sizeof(vl_select_mask)) {
2749 		memset(vlinfo, 0, sizeof(*vlinfo));
2750 
2751 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2752 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2753 
2754 		rsp->vls[vfi].port_vl_rcv_pkts =
2755 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2756 						  idx_from_vl(vl)));
2757 
2758 		rsp->vls[vfi].port_vl_xmit_data =
2759 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2760 						   idx_from_vl(vl)));
2761 
2762 		rsp->vls[vfi].port_vl_xmit_pkts =
2763 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2764 						   idx_from_vl(vl)));
2765 
2766 		rsp->vls[vfi].port_vl_xmit_wait =
2767 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2768 						   idx_from_vl(vl)));
2769 
2770 		rsp->vls[vfi].port_vl_rcv_fecn =
2771 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2772 						  idx_from_vl(vl)));
2773 
2774 		rsp->vls[vfi].port_vl_rcv_becn =
2775 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2776 						  idx_from_vl(vl)));
2777 
2778 		rsp->vls[vfi].port_vl_xmit_discards =
2779 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2780 						   idx_from_vl(vl)));
2781 		vlinfo++;
2782 		vfi++;
2783 	}
2784 
2785 	a0_portstatus(ppd, rsp, vl_select_mask);
2786 
2787 	if (resp_len)
2788 		*resp_len += response_data_size;
2789 
2790 	return reply((struct ib_mad_hdr *)pmp);
2791 }
2792 
2793 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2794 				     u8 res_lli, u8 res_ler)
2795 {
2796 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2797 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2798 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2799 	u64 error_counter_summary = 0, tmp;
2800 
2801 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2802 						CNTR_INVALID_VL);
2803 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2804 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2805 						CNTR_INVALID_VL);
2806 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2807 						CNTR_INVALID_VL);
2808 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2809 					       CNTR_INVALID_VL);
2810 	/* local link integrity must be right-shifted by the lli resolution */
2811 	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2812 						CNTR_INVALID_VL) >> res_lli);
2813 	/* link error recovery must b right-shifted by the ler resolution */
2814 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2815 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2816 	error_counter_summary += (tmp >> res_ler);
2817 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2818 					       CNTR_INVALID_VL);
2819 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2820 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2821 					       CNTR_INVALID_VL);
2822 	/* ppd->link_downed is a 32-bit value */
2823 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2824 						CNTR_INVALID_VL);
2825 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2826 	/* this is an 8-bit quantity */
2827 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2828 
2829 	return error_counter_summary;
2830 }
2831 
2832 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2833 			    u32 vl_select_mask)
2834 {
2835 	if (!is_bx(ppd->dd)) {
2836 		unsigned long vl;
2837 		u64 sum_vl_xmit_wait = 0;
2838 		u32 vl_all_mask = VL_MASK_ALL;
2839 
2840 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2841 				 8 * sizeof(vl_all_mask)) {
2842 			u64 tmp = sum_vl_xmit_wait +
2843 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2844 						 idx_from_vl(vl));
2845 			if (tmp < sum_vl_xmit_wait) {
2846 				/* we wrapped */
2847 				sum_vl_xmit_wait = (u64)~0;
2848 				break;
2849 			}
2850 			sum_vl_xmit_wait = tmp;
2851 		}
2852 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2853 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2854 	}
2855 }
2856 
2857 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2858 				   struct _port_dctrs *rsp)
2859 {
2860 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2861 
2862 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2863 						CNTR_INVALID_VL));
2864 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2865 						CNTR_INVALID_VL));
2866 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2867 						CNTR_INVALID_VL));
2868 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2869 						CNTR_INVALID_VL));
2870 	rsp->port_multicast_xmit_pkts =
2871 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2872 					  CNTR_INVALID_VL));
2873 	rsp->port_multicast_rcv_pkts =
2874 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2875 					  CNTR_INVALID_VL));
2876 }
2877 
2878 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2879 				    struct ib_device *ibdev,
2880 				    u8 port, u32 *resp_len)
2881 {
2882 	struct opa_port_data_counters_msg *req =
2883 		(struct opa_port_data_counters_msg *)pmp->data;
2884 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2885 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2886 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2887 	struct _port_dctrs *rsp;
2888 	struct _vls_dctrs *vlinfo;
2889 	size_t response_data_size;
2890 	u32 num_ports;
2891 	u8 num_pslm;
2892 	u8 lq, num_vls;
2893 	u8 res_lli, res_ler;
2894 	u64 port_mask;
2895 	u8 port_num;
2896 	unsigned long vl;
2897 	u32 vl_select_mask;
2898 	int vfi;
2899 
2900 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2901 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2902 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2903 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2904 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2905 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2906 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2907 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2908 
2909 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2910 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2911 		return reply((struct ib_mad_hdr *)pmp);
2912 	}
2913 
2914 	/* Sanity check */
2915 	response_data_size = sizeof(struct opa_port_data_counters_msg) +
2916 				num_vls * sizeof(struct _vls_dctrs);
2917 
2918 	if (response_data_size > sizeof(pmp->data)) {
2919 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2920 		return reply((struct ib_mad_hdr *)pmp);
2921 	}
2922 
2923 	/*
2924 	 * The bit set in the mask needs to be consistent with the
2925 	 * port the request came in on.
2926 	 */
2927 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2928 	port_num = find_first_bit((unsigned long *)&port_mask,
2929 				  sizeof(port_mask) * 8);
2930 
2931 	if (port_num != port) {
2932 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2933 		return reply((struct ib_mad_hdr *)pmp);
2934 	}
2935 
2936 	rsp = &req->port[0];
2937 	memset(rsp, 0, sizeof(*rsp));
2938 
2939 	rsp->port_number = port;
2940 	/*
2941 	 * Note that link_quality_indicator is a 32 bit quantity in
2942 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2943 	 * where it's a byte).
2944 	 */
2945 	hfi1_read_link_quality(dd, &lq);
2946 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2947 	pma_get_opa_port_dctrs(ibdev, rsp);
2948 
2949 	rsp->port_xmit_wait =
2950 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2951 	rsp->port_rcv_fecn =
2952 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2953 	rsp->port_rcv_becn =
2954 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2955 	rsp->port_error_counter_summary =
2956 		cpu_to_be64(get_error_counter_summary(ibdev, port,
2957 						      res_lli, res_ler));
2958 
2959 	vlinfo = &rsp->vls[0];
2960 	vfi = 0;
2961 	/* The vl_select_mask has been checked above, and we know
2962 	 * that it contains only entries which represent valid VLs.
2963 	 * So in the for_each_set_bit() loop below, we don't need
2964 	 * any additional checks for vl.
2965 	 */
2966 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2967 			 8 * sizeof(req->vl_select_mask)) {
2968 		memset(vlinfo, 0, sizeof(*vlinfo));
2969 
2970 		rsp->vls[vfi].port_vl_xmit_data =
2971 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2972 						   idx_from_vl(vl)));
2973 
2974 		rsp->vls[vfi].port_vl_rcv_data =
2975 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2976 						  idx_from_vl(vl)));
2977 
2978 		rsp->vls[vfi].port_vl_xmit_pkts =
2979 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2980 						   idx_from_vl(vl)));
2981 
2982 		rsp->vls[vfi].port_vl_rcv_pkts =
2983 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2984 						  idx_from_vl(vl)));
2985 
2986 		rsp->vls[vfi].port_vl_xmit_wait =
2987 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2988 						   idx_from_vl(vl)));
2989 
2990 		rsp->vls[vfi].port_vl_rcv_fecn =
2991 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2992 						  idx_from_vl(vl)));
2993 		rsp->vls[vfi].port_vl_rcv_becn =
2994 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2995 						  idx_from_vl(vl)));
2996 
2997 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2998 		/* rsp->port_vl_xmit_wasted_bw ??? */
2999 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
3000 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
3001 		 */
3002 		/*rsp->vls[vfi].port_vl_mark_fecn =
3003 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
3004 		 *		+ offset));
3005 		 */
3006 		vlinfo++;
3007 		vfi++;
3008 	}
3009 
3010 	a0_datacounters(ppd, rsp, vl_select_mask);
3011 
3012 	if (resp_len)
3013 		*resp_len += response_data_size;
3014 
3015 	return reply((struct ib_mad_hdr *)pmp);
3016 }
3017 
3018 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
3019 				       struct ib_device *ibdev, u8 port)
3020 {
3021 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
3022 						pmp->data;
3023 	struct _port_dctrs rsp;
3024 
3025 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3026 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3027 		goto bail;
3028 	}
3029 
3030 	memset(&rsp, 0, sizeof(rsp));
3031 	pma_get_opa_port_dctrs(ibdev, &rsp);
3032 
3033 	p->port_xmit_data = rsp.port_xmit_data;
3034 	p->port_rcv_data = rsp.port_rcv_data;
3035 	p->port_xmit_packets = rsp.port_xmit_pkts;
3036 	p->port_rcv_packets = rsp.port_rcv_pkts;
3037 	p->port_unicast_xmit_packets = 0;
3038 	p->port_unicast_rcv_packets =  0;
3039 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
3040 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
3041 
3042 bail:
3043 	return reply((struct ib_mad_hdr *)pmp);
3044 }
3045 
3046 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
3047 				   struct _port_ectrs *rsp, u8 port)
3048 {
3049 	u64 tmp, tmp2;
3050 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3051 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3052 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3053 
3054 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
3055 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3056 					CNTR_INVALID_VL);
3057 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
3058 		/* overflow/wrapped */
3059 		rsp->link_error_recovery = cpu_to_be32(~0);
3060 	} else {
3061 		rsp->link_error_recovery = cpu_to_be32(tmp2);
3062 	}
3063 
3064 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
3065 						CNTR_INVALID_VL));
3066 	rsp->port_rcv_errors =
3067 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3068 	rsp->port_rcv_remote_physical_errors =
3069 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3070 					  CNTR_INVALID_VL));
3071 	rsp->port_rcv_switch_relay_errors = 0;
3072 	rsp->port_xmit_discards =
3073 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
3074 					   CNTR_INVALID_VL));
3075 	rsp->port_xmit_constraint_errors =
3076 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
3077 					   CNTR_INVALID_VL));
3078 	rsp->port_rcv_constraint_errors =
3079 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
3080 					   CNTR_INVALID_VL));
3081 	rsp->local_link_integrity_errors =
3082 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
3083 					  CNTR_INVALID_VL));
3084 	rsp->excessive_buffer_overruns =
3085 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
3086 }
3087 
3088 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
3089 				  struct ib_device *ibdev,
3090 				  u8 port, u32 *resp_len)
3091 {
3092 	size_t response_data_size;
3093 	struct _port_ectrs *rsp;
3094 	u8 port_num;
3095 	struct opa_port_error_counters64_msg *req;
3096 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3097 	u32 num_ports;
3098 	u8 num_pslm;
3099 	u8 num_vls;
3100 	struct hfi1_ibport *ibp;
3101 	struct hfi1_pportdata *ppd;
3102 	struct _vls_ectrs *vlinfo;
3103 	unsigned long vl;
3104 	u64 port_mask, tmp;
3105 	u32 vl_select_mask;
3106 	int vfi;
3107 
3108 	req = (struct opa_port_error_counters64_msg *)pmp->data;
3109 
3110 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3111 
3112 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3113 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
3114 
3115 	if (num_ports != 1 || num_ports != num_pslm) {
3116 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3117 		return reply((struct ib_mad_hdr *)pmp);
3118 	}
3119 
3120 	response_data_size = sizeof(struct opa_port_error_counters64_msg) +
3121 				num_vls * sizeof(struct _vls_ectrs);
3122 
3123 	if (response_data_size > sizeof(pmp->data)) {
3124 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3125 		return reply((struct ib_mad_hdr *)pmp);
3126 	}
3127 	/*
3128 	 * The bit set in the mask needs to be consistent with the
3129 	 * port the request came in on.
3130 	 */
3131 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3132 	port_num = find_first_bit((unsigned long *)&port_mask,
3133 				  sizeof(port_mask) * 8);
3134 
3135 	if (port_num != port) {
3136 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3137 		return reply((struct ib_mad_hdr *)pmp);
3138 	}
3139 
3140 	rsp = &req->port[0];
3141 
3142 	ibp = to_iport(ibdev, port_num);
3143 	ppd = ppd_from_ibp(ibp);
3144 
3145 	memset(rsp, 0, sizeof(*rsp));
3146 	rsp->port_number = port_num;
3147 
3148 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
3149 
3150 	rsp->port_rcv_remote_physical_errors =
3151 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3152 					  CNTR_INVALID_VL));
3153 	rsp->fm_config_errors =
3154 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
3155 					  CNTR_INVALID_VL));
3156 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
3157 
3158 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
3159 	rsp->port_rcv_errors =
3160 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3161 	vlinfo = &rsp->vls[0];
3162 	vfi = 0;
3163 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
3164 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3165 			 8 * sizeof(req->vl_select_mask)) {
3166 		memset(vlinfo, 0, sizeof(*vlinfo));
3167 		rsp->vls[vfi].port_vl_xmit_discards =
3168 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3169 						   idx_from_vl(vl)));
3170 		vlinfo += 1;
3171 		vfi++;
3172 	}
3173 
3174 	if (resp_len)
3175 		*resp_len += response_data_size;
3176 
3177 	return reply((struct ib_mad_hdr *)pmp);
3178 }
3179 
3180 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
3181 				   struct ib_device *ibdev, u8 port)
3182 {
3183 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
3184 		pmp->data;
3185 	struct _port_ectrs rsp;
3186 	u64 temp_link_overrun_errors;
3187 	u64 temp_64;
3188 	u32 temp_32;
3189 
3190 	memset(&rsp, 0, sizeof(rsp));
3191 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
3192 
3193 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3194 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3195 		goto bail;
3196 	}
3197 
3198 	p->symbol_error_counter = 0; /* N/A for OPA */
3199 
3200 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
3201 	if (temp_32 > 0xFFUL)
3202 		p->link_error_recovery_counter = 0xFF;
3203 	else
3204 		p->link_error_recovery_counter = (u8)temp_32;
3205 
3206 	temp_32 = be32_to_cpu(rsp.link_downed);
3207 	if (temp_32 > 0xFFUL)
3208 		p->link_downed_counter = 0xFF;
3209 	else
3210 		p->link_downed_counter = (u8)temp_32;
3211 
3212 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
3213 	if (temp_64 > 0xFFFFUL)
3214 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
3215 	else
3216 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
3217 
3218 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
3219 	if (temp_64 > 0xFFFFUL)
3220 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
3221 	else
3222 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
3223 
3224 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
3225 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
3226 
3227 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
3228 	if (temp_64 > 0xFFFFUL)
3229 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
3230 	else
3231 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
3232 
3233 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
3234 	if (temp_64 > 0xFFUL)
3235 		p->port_xmit_constraint_errors = 0xFF;
3236 	else
3237 		p->port_xmit_constraint_errors = (u8)temp_64;
3238 
3239 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
3240 	if (temp_64 > 0xFFUL)
3241 		p->port_rcv_constraint_errors = 0xFFUL;
3242 	else
3243 		p->port_rcv_constraint_errors = (u8)temp_64;
3244 
3245 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
3246 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
3247 	if (temp_64 > 0xFUL)
3248 		temp_64 = 0xFUL;
3249 
3250 	temp_link_overrun_errors = temp_64 << 4;
3251 
3252 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
3253 	if (temp_64 > 0xFUL)
3254 		temp_64 = 0xFUL;
3255 	temp_link_overrun_errors |= temp_64;
3256 
3257 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
3258 
3259 	p->vl15_dropped = 0; /* N/A for OPA */
3260 
3261 bail:
3262 	return reply((struct ib_mad_hdr *)pmp);
3263 }
3264 
3265 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
3266 				 struct ib_device *ibdev,
3267 				 u8 port, u32 *resp_len)
3268 {
3269 	size_t response_data_size;
3270 	struct _port_ei *rsp;
3271 	struct opa_port_error_info_msg *req;
3272 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3273 	u64 port_mask;
3274 	u32 num_ports;
3275 	u8 port_num;
3276 	u8 num_pslm;
3277 	u64 reg;
3278 
3279 	req = (struct opa_port_error_info_msg *)pmp->data;
3280 	rsp = &req->port[0];
3281 
3282 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3283 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3284 
3285 	memset(rsp, 0, sizeof(*rsp));
3286 
3287 	if (num_ports != 1 || num_ports != num_pslm) {
3288 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3289 		return reply((struct ib_mad_hdr *)pmp);
3290 	}
3291 
3292 	/* Sanity check */
3293 	response_data_size = sizeof(struct opa_port_error_info_msg);
3294 
3295 	if (response_data_size > sizeof(pmp->data)) {
3296 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3297 		return reply((struct ib_mad_hdr *)pmp);
3298 	}
3299 
3300 	/*
3301 	 * The bit set in the mask needs to be consistent with the port
3302 	 * the request came in on.
3303 	 */
3304 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3305 	port_num = find_first_bit((unsigned long *)&port_mask,
3306 				  sizeof(port_mask) * 8);
3307 
3308 	if (port_num != port) {
3309 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3310 		return reply((struct ib_mad_hdr *)pmp);
3311 	}
3312 
3313 	/* PortRcvErrorInfo */
3314 	rsp->port_rcv_ei.status_and_code =
3315 		dd->err_info_rcvport.status_and_code;
3316 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3317 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3318 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3319 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3320 
3321 	/* ExcessiverBufferOverrunInfo */
3322 	reg = read_csr(dd, RCV_ERR_INFO);
3323 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3324 		/*
3325 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3326 		 * first pkt that encountered an excess buffer overrun
3327 		 */
3328 		u8 tmp = (u8)reg;
3329 
3330 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3331 		tmp <<= 2;
3332 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3333 		/* set the status bit */
3334 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3335 	}
3336 
3337 	rsp->port_xmit_constraint_ei.status =
3338 		dd->err_info_xmit_constraint.status;
3339 	rsp->port_xmit_constraint_ei.pkey =
3340 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3341 	rsp->port_xmit_constraint_ei.slid =
3342 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3343 
3344 	rsp->port_rcv_constraint_ei.status =
3345 		dd->err_info_rcv_constraint.status;
3346 	rsp->port_rcv_constraint_ei.pkey =
3347 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3348 	rsp->port_rcv_constraint_ei.slid =
3349 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3350 
3351 	/* UncorrectableErrorInfo */
3352 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3353 
3354 	/* FMConfigErrorInfo */
3355 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3356 
3357 	if (resp_len)
3358 		*resp_len += response_data_size;
3359 
3360 	return reply((struct ib_mad_hdr *)pmp);
3361 }
3362 
3363 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3364 				  struct ib_device *ibdev,
3365 				  u8 port, u32 *resp_len)
3366 {
3367 	struct opa_clear_port_status *req =
3368 		(struct opa_clear_port_status *)pmp->data;
3369 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3370 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3371 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3372 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3373 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3374 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3375 	u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3376 	unsigned long vl;
3377 
3378 	if ((nports != 1) || (portn != 1 << port)) {
3379 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3380 		return reply((struct ib_mad_hdr *)pmp);
3381 	}
3382 	/*
3383 	 * only counters returned by pma_get_opa_portstatus() are
3384 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3385 	 * the corresponding change should be made here as well.
3386 	 */
3387 
3388 	if (counter_select & CS_PORT_XMIT_DATA)
3389 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3390 
3391 	if (counter_select & CS_PORT_RCV_DATA)
3392 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3393 
3394 	if (counter_select & CS_PORT_XMIT_PKTS)
3395 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3396 
3397 	if (counter_select & CS_PORT_RCV_PKTS)
3398 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3399 
3400 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3401 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3402 
3403 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3404 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3405 
3406 	if (counter_select & CS_PORT_XMIT_WAIT)
3407 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3408 
3409 	/* ignore cs_sw_portCongestion for HFIs */
3410 
3411 	if (counter_select & CS_PORT_RCV_FECN)
3412 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3413 
3414 	if (counter_select & CS_PORT_RCV_BECN)
3415 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3416 
3417 	/* ignore cs_port_xmit_time_cong for HFIs */
3418 	/* ignore cs_port_xmit_wasted_bw for now */
3419 	/* ignore cs_port_xmit_wait_data for now */
3420 	if (counter_select & CS_PORT_RCV_BUBBLE)
3421 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3422 
3423 	/* Only applicable for switch */
3424 	/* if (counter_select & CS_PORT_MARK_FECN)
3425 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3426 	 */
3427 
3428 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3429 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3430 
3431 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3432 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3433 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3434 
3435 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3436 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3437 
3438 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3439 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3440 
3441 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3442 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3443 
3444 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3445 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3446 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3447 			       CNTR_INVALID_VL, 0);
3448 	}
3449 
3450 	if (counter_select & CS_PORT_RCV_ERRORS)
3451 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3452 
3453 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3454 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3455 		dd->rcv_ovfl_cnt = 0;
3456 	}
3457 
3458 	if (counter_select & CS_FM_CONFIG_ERRORS)
3459 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3460 
3461 	if (counter_select & CS_LINK_DOWNED)
3462 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3463 
3464 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3465 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3466 
3467 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3468 			 8 * sizeof(vl_select_mask)) {
3469 		if (counter_select & CS_PORT_XMIT_DATA)
3470 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3471 
3472 		if (counter_select & CS_PORT_RCV_DATA)
3473 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3474 
3475 		if (counter_select & CS_PORT_XMIT_PKTS)
3476 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3477 
3478 		if (counter_select & CS_PORT_RCV_PKTS)
3479 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3480 
3481 		if (counter_select & CS_PORT_XMIT_WAIT)
3482 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3483 
3484 		/* sw_port_vl_congestion is 0 for HFIs */
3485 		if (counter_select & CS_PORT_RCV_FECN)
3486 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3487 
3488 		if (counter_select & CS_PORT_RCV_BECN)
3489 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3490 
3491 		/* port_vl_xmit_time_cong is 0 for HFIs */
3492 		/* port_vl_xmit_wasted_bw ??? */
3493 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3494 		if (counter_select & CS_PORT_RCV_BUBBLE)
3495 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3496 
3497 		/* if (counter_select & CS_PORT_MARK_FECN)
3498 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3499 		 */
3500 		if (counter_select & C_SW_XMIT_DSCD_VL)
3501 			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3502 					idx_from_vl(vl), 0);
3503 	}
3504 
3505 	if (resp_len)
3506 		*resp_len += sizeof(*req);
3507 
3508 	return reply((struct ib_mad_hdr *)pmp);
3509 }
3510 
3511 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3512 				 struct ib_device *ibdev,
3513 				 u8 port, u32 *resp_len)
3514 {
3515 	struct _port_ei *rsp;
3516 	struct opa_port_error_info_msg *req;
3517 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3518 	u64 port_mask;
3519 	u32 num_ports;
3520 	u8 port_num;
3521 	u8 num_pslm;
3522 	u32 error_info_select;
3523 
3524 	req = (struct opa_port_error_info_msg *)pmp->data;
3525 	rsp = &req->port[0];
3526 
3527 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3528 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3529 
3530 	memset(rsp, 0, sizeof(*rsp));
3531 
3532 	if (num_ports != 1 || num_ports != num_pslm) {
3533 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3534 		return reply((struct ib_mad_hdr *)pmp);
3535 	}
3536 
3537 	/*
3538 	 * The bit set in the mask needs to be consistent with the port
3539 	 * the request came in on.
3540 	 */
3541 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3542 	port_num = find_first_bit((unsigned long *)&port_mask,
3543 				  sizeof(port_mask) * 8);
3544 
3545 	if (port_num != port) {
3546 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3547 		return reply((struct ib_mad_hdr *)pmp);
3548 	}
3549 
3550 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3551 
3552 	/* PortRcvErrorInfo */
3553 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3554 		/* turn off status bit */
3555 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3556 
3557 	/* ExcessiverBufferOverrunInfo */
3558 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3559 		/*
3560 		 * status bit is essentially kept in the h/w - bit 5 of
3561 		 * RCV_ERR_INFO
3562 		 */
3563 		write_csr(dd, RCV_ERR_INFO,
3564 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3565 
3566 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3567 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3568 
3569 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3570 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3571 
3572 	/* UncorrectableErrorInfo */
3573 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3574 		/* turn off status bit */
3575 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3576 
3577 	/* FMConfigErrorInfo */
3578 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3579 		/* turn off status bit */
3580 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3581 
3582 	if (resp_len)
3583 		*resp_len += sizeof(*req);
3584 
3585 	return reply((struct ib_mad_hdr *)pmp);
3586 }
3587 
3588 struct opa_congestion_info_attr {
3589 	__be16 congestion_info;
3590 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3591 	u8 congestion_log_length;
3592 } __packed;
3593 
3594 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3595 				    struct ib_device *ibdev, u8 port,
3596 				    u32 *resp_len, u32 max_len)
3597 {
3598 	struct opa_congestion_info_attr *p =
3599 		(struct opa_congestion_info_attr *)data;
3600 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3601 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3602 
3603 	if (smp_length_check(sizeof(*p), max_len)) {
3604 		smp->status |= IB_SMP_INVALID_FIELD;
3605 		return reply((struct ib_mad_hdr *)smp);
3606 	}
3607 
3608 	p->congestion_info = 0;
3609 	p->control_table_cap = ppd->cc_max_table_entries;
3610 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3611 
3612 	if (resp_len)
3613 		*resp_len += sizeof(*p);
3614 
3615 	return reply((struct ib_mad_hdr *)smp);
3616 }
3617 
3618 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3619 				       u8 *data, struct ib_device *ibdev,
3620 				       u8 port, u32 *resp_len, u32 max_len)
3621 {
3622 	int i;
3623 	struct opa_congestion_setting_attr *p =
3624 		(struct opa_congestion_setting_attr *)data;
3625 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3626 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3627 	struct opa_congestion_setting_entry_shadow *entries;
3628 	struct cc_state *cc_state;
3629 
3630 	if (smp_length_check(sizeof(*p), max_len)) {
3631 		smp->status |= IB_SMP_INVALID_FIELD;
3632 		return reply((struct ib_mad_hdr *)smp);
3633 	}
3634 
3635 	rcu_read_lock();
3636 
3637 	cc_state = get_cc_state(ppd);
3638 
3639 	if (!cc_state) {
3640 		rcu_read_unlock();
3641 		return reply((struct ib_mad_hdr *)smp);
3642 	}
3643 
3644 	entries = cc_state->cong_setting.entries;
3645 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3646 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3647 	for (i = 0; i < OPA_MAX_SLS; i++) {
3648 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3649 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3650 		p->entries[i].trigger_threshold =
3651 			entries[i].trigger_threshold;
3652 		p->entries[i].ccti_min = entries[i].ccti_min;
3653 	}
3654 
3655 	rcu_read_unlock();
3656 
3657 	if (resp_len)
3658 		*resp_len += sizeof(*p);
3659 
3660 	return reply((struct ib_mad_hdr *)smp);
3661 }
3662 
3663 /*
3664  * Apply congestion control information stored in the ppd to the
3665  * active structure.
3666  */
3667 static void apply_cc_state(struct hfi1_pportdata *ppd)
3668 {
3669 	struct cc_state *old_cc_state, *new_cc_state;
3670 
3671 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3672 	if (!new_cc_state)
3673 		return;
3674 
3675 	/*
3676 	 * Hold the lock for updating *and* to prevent ppd information
3677 	 * from changing during the update.
3678 	 */
3679 	spin_lock(&ppd->cc_state_lock);
3680 
3681 	old_cc_state = get_cc_state_protected(ppd);
3682 	if (!old_cc_state) {
3683 		/* never active, or shutting down */
3684 		spin_unlock(&ppd->cc_state_lock);
3685 		kfree(new_cc_state);
3686 		return;
3687 	}
3688 
3689 	*new_cc_state = *old_cc_state;
3690 
3691 	new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3692 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3693 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3694 
3695 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3696 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3697 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3698 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3699 
3700 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3701 
3702 	spin_unlock(&ppd->cc_state_lock);
3703 
3704 	kfree_rcu(old_cc_state, rcu);
3705 }
3706 
3707 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3708 				       struct ib_device *ibdev, u8 port,
3709 				       u32 *resp_len, u32 max_len)
3710 {
3711 	struct opa_congestion_setting_attr *p =
3712 		(struct opa_congestion_setting_attr *)data;
3713 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3714 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3715 	struct opa_congestion_setting_entry_shadow *entries;
3716 	int i;
3717 
3718 	if (smp_length_check(sizeof(*p), max_len)) {
3719 		smp->status |= IB_SMP_INVALID_FIELD;
3720 		return reply((struct ib_mad_hdr *)smp);
3721 	}
3722 
3723 	/*
3724 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3725 	 * our information is consistent with anyone trying to apply the state.
3726 	 */
3727 	spin_lock(&ppd->cc_state_lock);
3728 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3729 
3730 	entries = ppd->congestion_entries;
3731 	for (i = 0; i < OPA_MAX_SLS; i++) {
3732 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3733 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3734 		entries[i].trigger_threshold =
3735 			p->entries[i].trigger_threshold;
3736 		entries[i].ccti_min = p->entries[i].ccti_min;
3737 	}
3738 	spin_unlock(&ppd->cc_state_lock);
3739 
3740 	/* now apply the information */
3741 	apply_cc_state(ppd);
3742 
3743 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3744 					   resp_len, max_len);
3745 }
3746 
3747 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3748 					u8 *data, struct ib_device *ibdev,
3749 					u8 port, u32 *resp_len, u32 max_len)
3750 {
3751 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3752 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3753 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3754 	s64 ts;
3755 	int i;
3756 
3757 	if (am || smp_length_check(sizeof(*cong_log), max_len)) {
3758 		smp->status |= IB_SMP_INVALID_FIELD;
3759 		return reply((struct ib_mad_hdr *)smp);
3760 	}
3761 
3762 	spin_lock_irq(&ppd->cc_log_lock);
3763 
3764 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3765 	cong_log->congestion_flags = 0;
3766 	cong_log->threshold_event_counter =
3767 		cpu_to_be16(ppd->threshold_event_counter);
3768 	memcpy(cong_log->threshold_cong_event_map,
3769 	       ppd->threshold_cong_event_map,
3770 	       sizeof(cong_log->threshold_cong_event_map));
3771 	/* keep timestamp in units of 1.024 usec */
3772 	ts = ktime_to_ns(ktime_get()) / 1024;
3773 	cong_log->current_time_stamp = cpu_to_be32(ts);
3774 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3775 		struct opa_hfi1_cong_log_event_internal *cce =
3776 			&ppd->cc_events[ppd->cc_mad_idx++];
3777 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3778 			ppd->cc_mad_idx = 0;
3779 		/*
3780 		 * Entries which are older than twice the time
3781 		 * required to wrap the counter are supposed to
3782 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3783 		 */
3784 		if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3785 			continue;
3786 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3787 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3788 		       &cce->rqpn, 3);
3789 		cong_log->events[i].sl_svc_type_cn_entry =
3790 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3791 		cong_log->events[i].remote_lid_cn_entry =
3792 			cpu_to_be32(cce->rlid);
3793 		cong_log->events[i].timestamp_cn_entry =
3794 			cpu_to_be32(cce->timestamp);
3795 	}
3796 
3797 	/*
3798 	 * Reset threshold_cong_event_map, and threshold_event_counter
3799 	 * to 0 when log is read.
3800 	 */
3801 	memset(ppd->threshold_cong_event_map, 0x0,
3802 	       sizeof(ppd->threshold_cong_event_map));
3803 	ppd->threshold_event_counter = 0;
3804 
3805 	spin_unlock_irq(&ppd->cc_log_lock);
3806 
3807 	if (resp_len)
3808 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3809 
3810 	return reply((struct ib_mad_hdr *)smp);
3811 }
3812 
3813 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3814 				   struct ib_device *ibdev, u8 port,
3815 				   u32 *resp_len, u32 max_len)
3816 {
3817 	struct ib_cc_table_attr *cc_table_attr =
3818 		(struct ib_cc_table_attr *)data;
3819 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3820 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3821 	u32 start_block = OPA_AM_START_BLK(am);
3822 	u32 n_blocks = OPA_AM_NBLK(am);
3823 	struct ib_cc_table_entry_shadow *entries;
3824 	int i, j;
3825 	u32 sentry, eentry;
3826 	struct cc_state *cc_state;
3827 	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3828 
3829 	/* sanity check n_blocks, start_block */
3830 	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3831 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3832 		smp->status |= IB_SMP_INVALID_FIELD;
3833 		return reply((struct ib_mad_hdr *)smp);
3834 	}
3835 
3836 	rcu_read_lock();
3837 
3838 	cc_state = get_cc_state(ppd);
3839 
3840 	if (!cc_state) {
3841 		rcu_read_unlock();
3842 		return reply((struct ib_mad_hdr *)smp);
3843 	}
3844 
3845 	sentry = start_block * IB_CCT_ENTRIES;
3846 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3847 
3848 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3849 
3850 	entries = cc_state->cct.entries;
3851 
3852 	/* return n_blocks, though the last block may not be full */
3853 	for (j = 0, i = sentry; i < eentry; j++, i++)
3854 		cc_table_attr->ccti_entries[j].entry =
3855 			cpu_to_be16(entries[i].entry);
3856 
3857 	rcu_read_unlock();
3858 
3859 	if (resp_len)
3860 		*resp_len += size;
3861 
3862 	return reply((struct ib_mad_hdr *)smp);
3863 }
3864 
3865 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3866 				   struct ib_device *ibdev, u8 port,
3867 				   u32 *resp_len, u32 max_len)
3868 {
3869 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3870 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3871 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3872 	u32 start_block = OPA_AM_START_BLK(am);
3873 	u32 n_blocks = OPA_AM_NBLK(am);
3874 	struct ib_cc_table_entry_shadow *entries;
3875 	int i, j;
3876 	u32 sentry, eentry;
3877 	u16 ccti_limit;
3878 	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3879 
3880 	/* sanity check n_blocks, start_block */
3881 	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3882 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3883 		smp->status |= IB_SMP_INVALID_FIELD;
3884 		return reply((struct ib_mad_hdr *)smp);
3885 	}
3886 
3887 	sentry = start_block * IB_CCT_ENTRIES;
3888 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3889 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3890 
3891 	/* sanity check ccti_limit */
3892 	ccti_limit = be16_to_cpu(p->ccti_limit);
3893 	if (ccti_limit + 1 > eentry) {
3894 		smp->status |= IB_SMP_INVALID_FIELD;
3895 		return reply((struct ib_mad_hdr *)smp);
3896 	}
3897 
3898 	/*
3899 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3900 	 * our information is consistent with anyone trying to apply the state.
3901 	 */
3902 	spin_lock(&ppd->cc_state_lock);
3903 	ppd->total_cct_entry = ccti_limit + 1;
3904 	entries = ppd->ccti_entries;
3905 	for (j = 0, i = sentry; i < eentry; j++, i++)
3906 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3907 	spin_unlock(&ppd->cc_state_lock);
3908 
3909 	/* now apply the information */
3910 	apply_cc_state(ppd);
3911 
3912 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
3913 				       max_len);
3914 }
3915 
3916 struct opa_led_info {
3917 	__be32 rsvd_led_mask;
3918 	__be32 rsvd;
3919 };
3920 
3921 #define OPA_LED_SHIFT	31
3922 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3923 
3924 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3925 				   struct ib_device *ibdev, u8 port,
3926 				   u32 *resp_len, u32 max_len)
3927 {
3928 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3929 	struct hfi1_pportdata *ppd = dd->pport;
3930 	struct opa_led_info *p = (struct opa_led_info *)data;
3931 	u32 nport = OPA_AM_NPORT(am);
3932 	u32 is_beaconing_active;
3933 
3934 	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
3935 		smp->status |= IB_SMP_INVALID_FIELD;
3936 		return reply((struct ib_mad_hdr *)smp);
3937 	}
3938 
3939 	/*
3940 	 * This pairs with the memory barrier in hfi1_start_led_override to
3941 	 * ensure that we read the correct state of LED beaconing represented
3942 	 * by led_override_timer_active
3943 	 */
3944 	smp_rmb();
3945 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3946 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3947 
3948 	if (resp_len)
3949 		*resp_len += sizeof(struct opa_led_info);
3950 
3951 	return reply((struct ib_mad_hdr *)smp);
3952 }
3953 
3954 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3955 				   struct ib_device *ibdev, u8 port,
3956 				   u32 *resp_len, u32 max_len)
3957 {
3958 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3959 	struct opa_led_info *p = (struct opa_led_info *)data;
3960 	u32 nport = OPA_AM_NPORT(am);
3961 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3962 
3963 	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
3964 		smp->status |= IB_SMP_INVALID_FIELD;
3965 		return reply((struct ib_mad_hdr *)smp);
3966 	}
3967 
3968 	if (on)
3969 		hfi1_start_led_override(dd->pport, 2000, 1500);
3970 	else
3971 		shutdown_led_override(dd->pport);
3972 
3973 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
3974 				       max_len);
3975 }
3976 
3977 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3978 			    u8 *data, struct ib_device *ibdev, u8 port,
3979 			    u32 *resp_len, u32 max_len)
3980 {
3981 	int ret;
3982 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3983 
3984 	switch (attr_id) {
3985 	case IB_SMP_ATTR_NODE_DESC:
3986 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3987 					      resp_len, max_len);
3988 		break;
3989 	case IB_SMP_ATTR_NODE_INFO:
3990 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3991 					      resp_len, max_len);
3992 		break;
3993 	case IB_SMP_ATTR_PORT_INFO:
3994 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3995 					      resp_len, max_len);
3996 		break;
3997 	case IB_SMP_ATTR_PKEY_TABLE:
3998 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3999 					       resp_len, max_len);
4000 		break;
4001 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4002 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
4003 					      resp_len, max_len);
4004 		break;
4005 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4006 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
4007 					      resp_len, max_len);
4008 		break;
4009 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4010 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
4011 					       resp_len, max_len);
4012 		break;
4013 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4014 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4015 						resp_len, max_len);
4016 		break;
4017 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4018 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
4019 					 resp_len, max_len);
4020 		break;
4021 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4022 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
4023 					 resp_len, max_len);
4024 		break;
4025 	case OPA_ATTRIB_ID_CABLE_INFO:
4026 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
4027 						resp_len, max_len);
4028 		break;
4029 	case IB_SMP_ATTR_VL_ARB_TABLE:
4030 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
4031 					    resp_len, max_len);
4032 		break;
4033 	case OPA_ATTRIB_ID_CONGESTION_INFO:
4034 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
4035 					       resp_len, max_len);
4036 		break;
4037 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4038 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
4039 						  port, resp_len, max_len);
4040 		break;
4041 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
4042 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
4043 						   port, resp_len, max_len);
4044 		break;
4045 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4046 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
4047 					      resp_len, max_len);
4048 		break;
4049 	case IB_SMP_ATTR_LED_INFO:
4050 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
4051 					      resp_len, max_len);
4052 		break;
4053 	case IB_SMP_ATTR_SM_INFO:
4054 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4055 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4056 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4057 			return IB_MAD_RESULT_SUCCESS;
4058 		/* FALLTHROUGH */
4059 	default:
4060 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4061 		ret = reply((struct ib_mad_hdr *)smp);
4062 		break;
4063 	}
4064 	return ret;
4065 }
4066 
4067 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4068 			    u8 *data, struct ib_device *ibdev, u8 port,
4069 			    u32 *resp_len, u32 max_len)
4070 {
4071 	int ret;
4072 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4073 
4074 	switch (attr_id) {
4075 	case IB_SMP_ATTR_PORT_INFO:
4076 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
4077 					      resp_len, max_len);
4078 		break;
4079 	case IB_SMP_ATTR_PKEY_TABLE:
4080 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
4081 					       resp_len, max_len);
4082 		break;
4083 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4084 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
4085 					      resp_len, max_len);
4086 		break;
4087 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4088 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
4089 					      resp_len, max_len);
4090 		break;
4091 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4092 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
4093 					       resp_len, max_len);
4094 		break;
4095 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4096 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4097 						resp_len, max_len);
4098 		break;
4099 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4100 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
4101 					 resp_len, max_len);
4102 		break;
4103 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4104 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
4105 					 resp_len, max_len);
4106 		break;
4107 	case IB_SMP_ATTR_VL_ARB_TABLE:
4108 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
4109 					    resp_len, max_len);
4110 		break;
4111 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4112 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
4113 						  port, resp_len, max_len);
4114 		break;
4115 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4116 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
4117 					      resp_len, max_len);
4118 		break;
4119 	case IB_SMP_ATTR_LED_INFO:
4120 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
4121 					      resp_len, max_len);
4122 		break;
4123 	case IB_SMP_ATTR_SM_INFO:
4124 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4125 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4126 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4127 			return IB_MAD_RESULT_SUCCESS;
4128 		/* FALLTHROUGH */
4129 	default:
4130 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4131 		ret = reply((struct ib_mad_hdr *)smp);
4132 		break;
4133 	}
4134 	return ret;
4135 }
4136 
4137 static inline void set_aggr_error(struct opa_aggregate *ag)
4138 {
4139 	ag->err_reqlength |= cpu_to_be16(0x8000);
4140 }
4141 
4142 static int subn_get_opa_aggregate(struct opa_smp *smp,
4143 				  struct ib_device *ibdev, u8 port,
4144 				  u32 *resp_len)
4145 {
4146 	int i;
4147 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4148 	u8 *next_smp = opa_get_smp_data(smp);
4149 
4150 	if (num_attr < 1 || num_attr > 117) {
4151 		smp->status |= IB_SMP_INVALID_FIELD;
4152 		return reply((struct ib_mad_hdr *)smp);
4153 	}
4154 
4155 	for (i = 0; i < num_attr; i++) {
4156 		struct opa_aggregate *agg;
4157 		size_t agg_data_len;
4158 		size_t agg_size;
4159 		u32 am;
4160 
4161 		agg = (struct opa_aggregate *)next_smp;
4162 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4163 		agg_size = sizeof(*agg) + agg_data_len;
4164 		am = be32_to_cpu(agg->attr_mod);
4165 
4166 		*resp_len += agg_size;
4167 
4168 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4169 			smp->status |= IB_SMP_INVALID_FIELD;
4170 			return reply((struct ib_mad_hdr *)smp);
4171 		}
4172 
4173 		/* zero the payload for this segment */
4174 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
4175 
4176 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
4177 				       ibdev, port, NULL, (u32)agg_data_len);
4178 
4179 		if (smp->status & IB_SMP_INVALID_FIELD)
4180 			break;
4181 		if (smp->status & ~IB_SMP_DIRECTION) {
4182 			set_aggr_error(agg);
4183 			return reply((struct ib_mad_hdr *)smp);
4184 		}
4185 		next_smp += agg_size;
4186 	}
4187 
4188 	return reply((struct ib_mad_hdr *)smp);
4189 }
4190 
4191 static int subn_set_opa_aggregate(struct opa_smp *smp,
4192 				  struct ib_device *ibdev, u8 port,
4193 				  u32 *resp_len)
4194 {
4195 	int i;
4196 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4197 	u8 *next_smp = opa_get_smp_data(smp);
4198 
4199 	if (num_attr < 1 || num_attr > 117) {
4200 		smp->status |= IB_SMP_INVALID_FIELD;
4201 		return reply((struct ib_mad_hdr *)smp);
4202 	}
4203 
4204 	for (i = 0; i < num_attr; i++) {
4205 		struct opa_aggregate *agg;
4206 		size_t agg_data_len;
4207 		size_t agg_size;
4208 		u32 am;
4209 
4210 		agg = (struct opa_aggregate *)next_smp;
4211 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4212 		agg_size = sizeof(*agg) + agg_data_len;
4213 		am = be32_to_cpu(agg->attr_mod);
4214 
4215 		*resp_len += agg_size;
4216 
4217 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4218 			smp->status |= IB_SMP_INVALID_FIELD;
4219 			return reply((struct ib_mad_hdr *)smp);
4220 		}
4221 
4222 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
4223 				       ibdev, port, NULL, (u32)agg_data_len);
4224 		if (smp->status & IB_SMP_INVALID_FIELD)
4225 			break;
4226 		if (smp->status & ~IB_SMP_DIRECTION) {
4227 			set_aggr_error(agg);
4228 			return reply((struct ib_mad_hdr *)smp);
4229 		}
4230 		next_smp += agg_size;
4231 	}
4232 
4233 	return reply((struct ib_mad_hdr *)smp);
4234 }
4235 
4236 /*
4237  * OPAv1 specifies that, on the transition to link up, these counters
4238  * are cleared:
4239  *   PortRcvErrors [*]
4240  *   LinkErrorRecovery
4241  *   LocalLinkIntegrityErrors
4242  *   ExcessiveBufferOverruns [*]
4243  *
4244  * [*] Error info associated with these counters is retained, but the
4245  * error info status is reset to 0.
4246  */
4247 void clear_linkup_counters(struct hfi1_devdata *dd)
4248 {
4249 	/* PortRcvErrors */
4250 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
4251 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
4252 	/* LinkErrorRecovery */
4253 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
4254 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
4255 	/* LocalLinkIntegrityErrors */
4256 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
4257 	/* ExcessiveBufferOverruns */
4258 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
4259 	dd->rcv_ovfl_cnt = 0;
4260 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
4261 }
4262 
4263 /*
4264  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
4265  * local node, 0 otherwise.
4266  */
4267 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
4268 			const struct ib_wc *in_wc)
4269 {
4270 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4271 	const struct opa_smp *smp = (const struct opa_smp *)mad;
4272 
4273 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
4274 		return (smp->hop_cnt == 0 &&
4275 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
4276 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
4277 	}
4278 
4279 	return (in_wc->slid == ppd->lid);
4280 }
4281 
4282 /*
4283  * opa_local_smp_check() should only be called on MADs for which
4284  * is_local_mad() returns true. It applies the SMP checks that are
4285  * specific to SMPs which are sent from, and destined to this node.
4286  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
4287  * otherwise.
4288  *
4289  * SMPs which arrive from other nodes are instead checked by
4290  * opa_smp_check().
4291  */
4292 static int opa_local_smp_check(struct hfi1_ibport *ibp,
4293 			       const struct ib_wc *in_wc)
4294 {
4295 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4296 	u16 slid = ib_lid_cpu16(in_wc->slid);
4297 	u16 pkey;
4298 
4299 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4300 		return 1;
4301 
4302 	pkey = ppd->pkeys[in_wc->pkey_index];
4303 	/*
4304 	 * We need to do the "node-local" checks specified in OPAv1,
4305 	 * rev 0.90, section 9.10.26, which are:
4306 	 *   - pkey is 0x7fff, or 0xffff
4307 	 *   - Source QPN == 0 || Destination QPN == 0
4308 	 *   - the MAD header's management class is either
4309 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4310 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4311 	 *   - SLID != 0
4312 	 *
4313 	 * However, we know (and so don't need to check again) that,
4314 	 * for local SMPs, the MAD stack passes MADs with:
4315 	 *   - Source QPN of 0
4316 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4317 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4318 	 *     our own port's lid
4319 	 *
4320 	 */
4321 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4322 		return 0;
4323 	ingress_pkey_table_fail(ppd, pkey, slid);
4324 	return 1;
4325 }
4326 
4327 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4328 			    u8 port, const struct opa_mad *in_mad,
4329 			    struct opa_mad *out_mad,
4330 			    u32 *resp_len)
4331 {
4332 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4333 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4334 	u8 *data;
4335 	u32 am, data_size;
4336 	__be16 attr_id;
4337 	int ret;
4338 
4339 	*out_mad = *in_mad;
4340 	data = opa_get_smp_data(smp);
4341 	data_size = (u32)opa_get_smp_data_size(smp);
4342 
4343 	am = be32_to_cpu(smp->attr_mod);
4344 	attr_id = smp->attr_id;
4345 	if (smp->class_version != OPA_SM_CLASS_VERSION) {
4346 		smp->status |= IB_SMP_UNSUP_VERSION;
4347 		ret = reply((struct ib_mad_hdr *)smp);
4348 		return ret;
4349 	}
4350 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4351 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4352 			 smp->hop_cnt);
4353 	if (ret) {
4354 		u32 port_num = be32_to_cpu(smp->attr_mod);
4355 
4356 		/*
4357 		 * If this is a get/set portinfo, we already check the
4358 		 * M_Key if the MAD is for another port and the M_Key
4359 		 * is OK on the receiving port. This check is needed
4360 		 * to increment the error counters when the M_Key
4361 		 * fails to match on *both* ports.
4362 		 */
4363 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4364 		    (smp->method == IB_MGMT_METHOD_GET ||
4365 		     smp->method == IB_MGMT_METHOD_SET) &&
4366 		    port_num && port_num <= ibdev->phys_port_cnt &&
4367 		    port != port_num)
4368 			(void)check_mkey(to_iport(ibdev, port_num),
4369 					  (struct ib_mad_hdr *)smp, 0,
4370 					  smp->mkey, smp->route.dr.dr_slid,
4371 					  smp->route.dr.return_path,
4372 					  smp->hop_cnt);
4373 		ret = IB_MAD_RESULT_FAILURE;
4374 		return ret;
4375 	}
4376 
4377 	*resp_len = opa_get_smp_header_size(smp);
4378 
4379 	switch (smp->method) {
4380 	case IB_MGMT_METHOD_GET:
4381 		switch (attr_id) {
4382 		default:
4383 			clear_opa_smp_data(smp);
4384 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4385 					       ibdev, port, resp_len,
4386 					       data_size);
4387 			break;
4388 		case OPA_ATTRIB_ID_AGGREGATE:
4389 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4390 						     resp_len);
4391 			break;
4392 		}
4393 		break;
4394 	case IB_MGMT_METHOD_SET:
4395 		switch (attr_id) {
4396 		default:
4397 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4398 					       ibdev, port, resp_len,
4399 					       data_size);
4400 			break;
4401 		case OPA_ATTRIB_ID_AGGREGATE:
4402 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4403 						     resp_len);
4404 			break;
4405 		}
4406 		break;
4407 	case IB_MGMT_METHOD_TRAP:
4408 	case IB_MGMT_METHOD_REPORT:
4409 	case IB_MGMT_METHOD_REPORT_RESP:
4410 	case IB_MGMT_METHOD_GET_RESP:
4411 		/*
4412 		 * The ib_mad module will call us to process responses
4413 		 * before checking for other consumers.
4414 		 * Just tell the caller to process it normally.
4415 		 */
4416 		ret = IB_MAD_RESULT_SUCCESS;
4417 		break;
4418 	case IB_MGMT_METHOD_TRAP_REPRESS:
4419 		subn_handle_opa_trap_repress(ibp, smp);
4420 		/* Always successful */
4421 		ret = IB_MAD_RESULT_SUCCESS;
4422 		break;
4423 	default:
4424 		smp->status |= IB_SMP_UNSUP_METHOD;
4425 		ret = reply((struct ib_mad_hdr *)smp);
4426 		break;
4427 	}
4428 
4429 	return ret;
4430 }
4431 
4432 static int process_subn(struct ib_device *ibdev, int mad_flags,
4433 			u8 port, const struct ib_mad *in_mad,
4434 			struct ib_mad *out_mad)
4435 {
4436 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4437 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4438 	int ret;
4439 
4440 	*out_mad = *in_mad;
4441 	if (smp->class_version != 1) {
4442 		smp->status |= IB_SMP_UNSUP_VERSION;
4443 		ret = reply((struct ib_mad_hdr *)smp);
4444 		return ret;
4445 	}
4446 
4447 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4448 			 smp->mkey, (__force __be32)smp->dr_slid,
4449 			 smp->return_path, smp->hop_cnt);
4450 	if (ret) {
4451 		u32 port_num = be32_to_cpu(smp->attr_mod);
4452 
4453 		/*
4454 		 * If this is a get/set portinfo, we already check the
4455 		 * M_Key if the MAD is for another port and the M_Key
4456 		 * is OK on the receiving port. This check is needed
4457 		 * to increment the error counters when the M_Key
4458 		 * fails to match on *both* ports.
4459 		 */
4460 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4461 		    (smp->method == IB_MGMT_METHOD_GET ||
4462 		     smp->method == IB_MGMT_METHOD_SET) &&
4463 		    port_num && port_num <= ibdev->phys_port_cnt &&
4464 		    port != port_num)
4465 			(void)check_mkey(to_iport(ibdev, port_num),
4466 					 (struct ib_mad_hdr *)smp, 0,
4467 					 smp->mkey,
4468 					 (__force __be32)smp->dr_slid,
4469 					 smp->return_path, smp->hop_cnt);
4470 		ret = IB_MAD_RESULT_FAILURE;
4471 		return ret;
4472 	}
4473 
4474 	switch (smp->method) {
4475 	case IB_MGMT_METHOD_GET:
4476 		switch (smp->attr_id) {
4477 		case IB_SMP_ATTR_NODE_INFO:
4478 			ret = subn_get_nodeinfo(smp, ibdev, port);
4479 			break;
4480 		default:
4481 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4482 			ret = reply((struct ib_mad_hdr *)smp);
4483 			break;
4484 		}
4485 		break;
4486 	}
4487 
4488 	return ret;
4489 }
4490 
4491 static int process_perf(struct ib_device *ibdev, u8 port,
4492 			const struct ib_mad *in_mad,
4493 			struct ib_mad *out_mad)
4494 {
4495 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4496 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4497 						&pmp->data;
4498 	int ret = IB_MAD_RESULT_FAILURE;
4499 
4500 	*out_mad = *in_mad;
4501 	if (pmp->mad_hdr.class_version != 1) {
4502 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4503 		ret = reply((struct ib_mad_hdr *)pmp);
4504 		return ret;
4505 	}
4506 
4507 	switch (pmp->mad_hdr.method) {
4508 	case IB_MGMT_METHOD_GET:
4509 		switch (pmp->mad_hdr.attr_id) {
4510 		case IB_PMA_PORT_COUNTERS:
4511 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4512 			break;
4513 		case IB_PMA_PORT_COUNTERS_EXT:
4514 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4515 			break;
4516 		case IB_PMA_CLASS_PORT_INFO:
4517 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4518 			ret = reply((struct ib_mad_hdr *)pmp);
4519 			break;
4520 		default:
4521 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4522 			ret = reply((struct ib_mad_hdr *)pmp);
4523 			break;
4524 		}
4525 		break;
4526 
4527 	case IB_MGMT_METHOD_SET:
4528 		if (pmp->mad_hdr.attr_id) {
4529 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4530 			ret = reply((struct ib_mad_hdr *)pmp);
4531 		}
4532 		break;
4533 
4534 	case IB_MGMT_METHOD_TRAP:
4535 	case IB_MGMT_METHOD_GET_RESP:
4536 		/*
4537 		 * The ib_mad module will call us to process responses
4538 		 * before checking for other consumers.
4539 		 * Just tell the caller to process it normally.
4540 		 */
4541 		ret = IB_MAD_RESULT_SUCCESS;
4542 		break;
4543 
4544 	default:
4545 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4546 		ret = reply((struct ib_mad_hdr *)pmp);
4547 		break;
4548 	}
4549 
4550 	return ret;
4551 }
4552 
4553 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4554 			    const struct opa_mad *in_mad,
4555 			    struct opa_mad *out_mad, u32 *resp_len)
4556 {
4557 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4558 	int ret;
4559 
4560 	*out_mad = *in_mad;
4561 
4562 	if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4563 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4564 		return reply((struct ib_mad_hdr *)pmp);
4565 	}
4566 
4567 	*resp_len = sizeof(pmp->mad_hdr);
4568 
4569 	switch (pmp->mad_hdr.method) {
4570 	case IB_MGMT_METHOD_GET:
4571 		switch (pmp->mad_hdr.attr_id) {
4572 		case IB_PMA_CLASS_PORT_INFO:
4573 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4574 			break;
4575 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4576 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4577 						     resp_len);
4578 			break;
4579 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4580 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4581 						       resp_len);
4582 			break;
4583 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4584 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4585 						     resp_len);
4586 			break;
4587 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4588 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4589 						    resp_len);
4590 			break;
4591 		default:
4592 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4593 			ret = reply((struct ib_mad_hdr *)pmp);
4594 			break;
4595 		}
4596 		break;
4597 
4598 	case IB_MGMT_METHOD_SET:
4599 		switch (pmp->mad_hdr.attr_id) {
4600 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4601 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4602 						     resp_len);
4603 			break;
4604 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4605 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4606 						    resp_len);
4607 			break;
4608 		default:
4609 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4610 			ret = reply((struct ib_mad_hdr *)pmp);
4611 			break;
4612 		}
4613 		break;
4614 
4615 	case IB_MGMT_METHOD_TRAP:
4616 	case IB_MGMT_METHOD_GET_RESP:
4617 		/*
4618 		 * The ib_mad module will call us to process responses
4619 		 * before checking for other consumers.
4620 		 * Just tell the caller to process it normally.
4621 		 */
4622 		ret = IB_MAD_RESULT_SUCCESS;
4623 		break;
4624 
4625 	default:
4626 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4627 		ret = reply((struct ib_mad_hdr *)pmp);
4628 		break;
4629 	}
4630 
4631 	return ret;
4632 }
4633 
4634 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4635 				u8 port, const struct ib_wc *in_wc,
4636 				const struct ib_grh *in_grh,
4637 				const struct opa_mad *in_mad,
4638 				struct opa_mad *out_mad, size_t *out_mad_size,
4639 				u16 *out_mad_pkey_index)
4640 {
4641 	int ret;
4642 	int pkey_idx;
4643 	u32 resp_len = 0;
4644 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4645 
4646 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4647 	if (pkey_idx < 0) {
4648 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4649 			hfi1_get_pkey(ibp, 1));
4650 		pkey_idx = 1;
4651 	}
4652 	*out_mad_pkey_index = (u16)pkey_idx;
4653 
4654 	switch (in_mad->mad_hdr.mgmt_class) {
4655 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4656 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4657 		if (is_local_mad(ibp, in_mad, in_wc)) {
4658 			ret = opa_local_smp_check(ibp, in_wc);
4659 			if (ret)
4660 				return IB_MAD_RESULT_FAILURE;
4661 		}
4662 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4663 				       out_mad, &resp_len);
4664 		goto bail;
4665 	case IB_MGMT_CLASS_PERF_MGMT:
4666 		ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4667 				       &resp_len);
4668 		goto bail;
4669 
4670 	default:
4671 		ret = IB_MAD_RESULT_SUCCESS;
4672 	}
4673 
4674 bail:
4675 	if (ret & IB_MAD_RESULT_REPLY)
4676 		*out_mad_size = round_up(resp_len, 8);
4677 	else if (ret & IB_MAD_RESULT_SUCCESS)
4678 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4679 
4680 	return ret;
4681 }
4682 
4683 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4684 			       const struct ib_wc *in_wc,
4685 			       const struct ib_grh *in_grh,
4686 			       const struct ib_mad *in_mad,
4687 			       struct ib_mad *out_mad)
4688 {
4689 	int ret;
4690 
4691 	switch (in_mad->mad_hdr.mgmt_class) {
4692 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4693 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4694 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4695 		break;
4696 	case IB_MGMT_CLASS_PERF_MGMT:
4697 		ret = process_perf(ibdev, port, in_mad, out_mad);
4698 		break;
4699 	default:
4700 		ret = IB_MAD_RESULT_SUCCESS;
4701 		break;
4702 	}
4703 
4704 	return ret;
4705 }
4706 
4707 /**
4708  * hfi1_process_mad - process an incoming MAD packet
4709  * @ibdev: the infiniband device this packet came in on
4710  * @mad_flags: MAD flags
4711  * @port: the port number this packet came in on
4712  * @in_wc: the work completion entry for this packet
4713  * @in_grh: the global route header for this packet
4714  * @in_mad: the incoming MAD
4715  * @out_mad: any outgoing MAD reply
4716  *
4717  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4718  * interested in processing.
4719  *
4720  * Note that the verbs framework has already done the MAD sanity checks,
4721  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4722  * MADs.
4723  *
4724  * This is called by the ib_mad module.
4725  */
4726 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4727 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4728 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4729 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4730 		     u16 *out_mad_pkey_index)
4731 {
4732 	switch (in_mad->base_version) {
4733 	case OPA_MGMT_BASE_VERSION:
4734 		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4735 			dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
4736 			return IB_MAD_RESULT_FAILURE;
4737 		}
4738 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4739 					    in_wc, in_grh,
4740 					    (struct opa_mad *)in_mad,
4741 					    (struct opa_mad *)out_mad,
4742 					    out_mad_size,
4743 					    out_mad_pkey_index);
4744 	case IB_MGMT_BASE_VERSION:
4745 		return hfi1_process_ib_mad(ibdev, mad_flags, port,
4746 					  in_wc, in_grh,
4747 					  (const struct ib_mad *)in_mad,
4748 					  (struct ib_mad *)out_mad);
4749 	default:
4750 		break;
4751 	}
4752 
4753 	return IB_MAD_RESULT_FAILURE;
4754 }
4755