xref: /openbmc/linux/drivers/infiniband/hw/hfi1/mad.c (revision 979ac5ef)
1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2 /*
3  * Copyright(c) 2015-2018 Intel Corporation.
4  */
5 
6 #include <linux/net.h>
7 #include <rdma/opa_addr.h>
8 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
9 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
10 
11 #include "hfi.h"
12 #include "mad.h"
13 #include "trace.h"
14 #include "qp.h"
15 #include "vnic.h"
16 
17 /* the reset value from the FM is supposed to be 0xffff, handle both */
18 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
19 #define OPA_LINK_WIDTH_RESET 0xffff
20 
21 struct trap_node {
22 	struct list_head list;
23 	struct opa_mad_notice_attr data;
24 	__be64 tid;
25 	int len;
26 	u32 retry;
27 	u8 in_use;
28 	u8 repress;
29 };
30 
31 static int smp_length_check(u32 data_size, u32 request_len)
32 {
33 	if (unlikely(request_len < data_size))
34 		return -EINVAL;
35 
36 	return 0;
37 }
38 
39 static int reply(struct ib_mad_hdr *smp)
40 {
41 	/*
42 	 * The verbs framework will handle the directed/LID route
43 	 * packet changes.
44 	 */
45 	smp->method = IB_MGMT_METHOD_GET_RESP;
46 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
47 		smp->status |= IB_SMP_DIRECTION;
48 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
49 }
50 
51 static inline void clear_opa_smp_data(struct opa_smp *smp)
52 {
53 	void *data = opa_get_smp_data(smp);
54 	size_t size = opa_get_smp_data_size(smp);
55 
56 	memset(data, 0, size);
57 }
58 
59 static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx)
60 {
61 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
62 
63 	if (pkey_idx < ARRAY_SIZE(ppd->pkeys))
64 		return ppd->pkeys[pkey_idx];
65 
66 	return 0;
67 }
68 
69 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port)
70 {
71 	struct ib_event event;
72 
73 	event.event = IB_EVENT_PKEY_CHANGE;
74 	event.device = &dd->verbs_dev.rdi.ibdev;
75 	event.element.port_num = port;
76 	ib_dispatch_event(&event);
77 }
78 
79 /*
80  * If the port is down, clean up all pending traps.  We need to be careful
81  * with the given trap, because it may be queued.
82  */
83 static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
84 {
85 	struct trap_node *node, *q;
86 	unsigned long flags;
87 	struct list_head trap_list;
88 	int i;
89 
90 	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
91 		spin_lock_irqsave(&ibp->rvp.lock, flags);
92 		list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
93 		ibp->rvp.trap_lists[i].list_len = 0;
94 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
95 
96 		/*
97 		 * Remove all items from the list, freeing all the non-given
98 		 * traps.
99 		 */
100 		list_for_each_entry_safe(node, q, &trap_list, list) {
101 			list_del(&node->list);
102 			if (node != trap)
103 				kfree(node);
104 		}
105 	}
106 
107 	/*
108 	 * If this wasn't on one of the lists it would not be freed.  If it
109 	 * was on the list, it is now safe to free.
110 	 */
111 	kfree(trap);
112 }
113 
114 static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
115 					    struct trap_node *trap)
116 {
117 	struct trap_node *node;
118 	struct trap_list *trap_list;
119 	unsigned long flags;
120 	unsigned long timeout;
121 	int found = 0;
122 	unsigned int queue_id;
123 	static int trap_count;
124 
125 	queue_id = trap->data.generic_type & 0x0F;
126 	if (queue_id >= RVT_MAX_TRAP_LISTS) {
127 		trap_count++;
128 		pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
129 				   trap->data.generic_type, trap_count);
130 		kfree(trap);
131 		return NULL;
132 	}
133 
134 	/*
135 	 * Since the retry (handle timeout) does not remove a trap request
136 	 * from the list, all we have to do is compare the node.
137 	 */
138 	spin_lock_irqsave(&ibp->rvp.lock, flags);
139 	trap_list = &ibp->rvp.trap_lists[queue_id];
140 
141 	list_for_each_entry(node, &trap_list->list, list) {
142 		if (node == trap) {
143 			node->retry++;
144 			found = 1;
145 			break;
146 		}
147 	}
148 
149 	/* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
150 	if (!found) {
151 		if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
152 			trap_list->list_len++;
153 			list_add_tail(&trap->list, &trap_list->list);
154 		} else {
155 			pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
156 					    trap->data.generic_type);
157 			kfree(trap);
158 		}
159 	}
160 
161 	/*
162 	 * Next check to see if there is a timer pending.  If not, set it up
163 	 * and get the first trap from the list.
164 	 */
165 	node = NULL;
166 	if (!timer_pending(&ibp->rvp.trap_timer)) {
167 		/*
168 		 * o14-2
169 		 * If the time out is set we have to wait until it expires
170 		 * before the trap can be sent.
171 		 * This should be > RVT_TRAP_TIMEOUT
172 		 */
173 		timeout = (RVT_TRAP_TIMEOUT *
174 			   (1UL << ibp->rvp.subnet_timeout)) / 1000;
175 		mod_timer(&ibp->rvp.trap_timer,
176 			  jiffies + usecs_to_jiffies(timeout));
177 		node = list_first_entry(&trap_list->list, struct trap_node,
178 					list);
179 		node->in_use = 1;
180 	}
181 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
182 
183 	return node;
184 }
185 
186 static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
187 					 struct opa_smp *smp)
188 {
189 	struct trap_list *trap_list;
190 	struct trap_node *trap;
191 	unsigned long flags;
192 	int i;
193 
194 	if (smp->attr_id != IB_SMP_ATTR_NOTICE)
195 		return;
196 
197 	spin_lock_irqsave(&ibp->rvp.lock, flags);
198 	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
199 		trap_list = &ibp->rvp.trap_lists[i];
200 		trap = list_first_entry_or_null(&trap_list->list,
201 						struct trap_node, list);
202 		if (trap && trap->tid == smp->tid) {
203 			if (trap->in_use) {
204 				trap->repress = 1;
205 			} else {
206 				trap_list->list_len--;
207 				list_del(&trap->list);
208 				kfree(trap);
209 			}
210 			break;
211 		}
212 	}
213 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
214 }
215 
216 static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
217 				   struct rdma_ah_attr *attr, u32 dlid)
218 {
219 	rdma_ah_set_dlid(attr, dlid);
220 	rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
221 	if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
222 		struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
223 
224 		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
225 		grh->sgid_index = 0;
226 		grh->hop_limit = 1;
227 		grh->dgid.global.subnet_prefix =
228 			ibp->rvp.gid_prefix;
229 		grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
230 	}
231 }
232 
233 static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
234 			      struct rvt_ah *ah, u32 dlid)
235 {
236 	struct rdma_ah_attr attr;
237 	struct rvt_qp *qp0;
238 	int ret = -EINVAL;
239 
240 	memset(&attr, 0, sizeof(attr));
241 	attr.type = ah->ibah.type;
242 	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
243 	rcu_read_lock();
244 	qp0 = rcu_dereference(ibp->rvp.qp[0]);
245 	if (qp0)
246 		ret = rdma_modify_ah(&ah->ibah, &attr);
247 	rcu_read_unlock();
248 	return ret;
249 }
250 
251 static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
252 {
253 	struct rdma_ah_attr attr;
254 	struct ib_ah *ah = ERR_PTR(-EINVAL);
255 	struct rvt_qp *qp0;
256 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
257 	struct hfi1_devdata *dd = dd_from_ppd(ppd);
258 	u32 port_num = ppd->port;
259 
260 	memset(&attr, 0, sizeof(attr));
261 	attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
262 	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
263 	rcu_read_lock();
264 	qp0 = rcu_dereference(ibp->rvp.qp[0]);
265 	if (qp0)
266 		ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
267 	rcu_read_unlock();
268 	return ah;
269 }
270 
271 static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
272 {
273 	struct ib_mad_send_buf *send_buf;
274 	struct ib_mad_agent *agent;
275 	struct opa_smp *smp;
276 	unsigned long flags;
277 	int pkey_idx;
278 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
279 
280 	agent = ibp->rvp.send_agent;
281 	if (!agent) {
282 		cleanup_traps(ibp, trap);
283 		return;
284 	}
285 
286 	/* o14-3.2.1 */
287 	if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
288 		cleanup_traps(ibp, trap);
289 		return;
290 	}
291 
292 	/* Add the trap to the list if necessary and see if we can send it */
293 	trap = check_and_add_trap(ibp, trap);
294 	if (!trap)
295 		return;
296 
297 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
298 	if (pkey_idx < 0) {
299 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
300 			__func__, hfi1_get_pkey(ibp, 1));
301 		pkey_idx = 1;
302 	}
303 
304 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
305 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
306 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
307 	if (IS_ERR(send_buf))
308 		return;
309 
310 	smp = send_buf->mad;
311 	smp->base_version = OPA_MGMT_BASE_VERSION;
312 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
313 	smp->class_version = OPA_SM_CLASS_VERSION;
314 	smp->method = IB_MGMT_METHOD_TRAP;
315 
316 	/* Only update the transaction ID for new traps (o13-5). */
317 	if (trap->tid == 0) {
318 		ibp->rvp.tid++;
319 		/* make sure that tid != 0 */
320 		if (ibp->rvp.tid == 0)
321 			ibp->rvp.tid++;
322 		trap->tid = cpu_to_be64(ibp->rvp.tid);
323 	}
324 	smp->tid = trap->tid;
325 
326 	smp->attr_id = IB_SMP_ATTR_NOTICE;
327 	/* o14-1: smp->mkey = 0; */
328 
329 	memcpy(smp->route.lid.data, &trap->data, trap->len);
330 
331 	spin_lock_irqsave(&ibp->rvp.lock, flags);
332 	if (!ibp->rvp.sm_ah) {
333 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
334 			struct ib_ah *ah;
335 
336 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
337 			if (IS_ERR(ah)) {
338 				spin_unlock_irqrestore(&ibp->rvp.lock, flags);
339 				return;
340 			}
341 			send_buf->ah = ah;
342 			ibp->rvp.sm_ah = ibah_to_rvtah(ah);
343 		} else {
344 			spin_unlock_irqrestore(&ibp->rvp.lock, flags);
345 			return;
346 		}
347 	} else {
348 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
349 	}
350 
351 	/*
352 	 * If the trap was repressed while things were getting set up, don't
353 	 * bother sending it. This could happen for a retry.
354 	 */
355 	if (trap->repress) {
356 		list_del(&trap->list);
357 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
358 		kfree(trap);
359 		ib_free_send_mad(send_buf);
360 		return;
361 	}
362 
363 	trap->in_use = 0;
364 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
365 
366 	if (ib_post_send_mad(send_buf, NULL))
367 		ib_free_send_mad(send_buf);
368 }
369 
370 void hfi1_handle_trap_timer(struct timer_list *t)
371 {
372 	struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer);
373 	struct trap_node *trap = NULL;
374 	unsigned long flags;
375 	int i;
376 
377 	/* Find the trap with the highest priority */
378 	spin_lock_irqsave(&ibp->rvp.lock, flags);
379 	for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
380 		trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
381 						struct trap_node, list);
382 	}
383 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
384 
385 	if (trap)
386 		send_trap(ibp, trap);
387 }
388 
389 static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
390 {
391 	struct trap_node *trap;
392 
393 	trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
394 	if (!trap)
395 		return NULL;
396 
397 	INIT_LIST_HEAD(&trap->list);
398 	trap->data.generic_type = type;
399 	trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
400 	trap->data.trap_num = trap_num;
401 	trap->data.issuer_lid = cpu_to_be32(lid);
402 
403 	return trap;
404 }
405 
406 /*
407  * Send a bad P_Key trap (ch. 14.3.8).
408  */
409 void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
410 		   u32 qp1, u32 qp2, u32 lid1, u32 lid2)
411 {
412 	struct trap_node *trap;
413 	u32 lid = ppd_from_ibp(ibp)->lid;
414 
415 	ibp->rvp.n_pkt_drops++;
416 	ibp->rvp.pkey_violations++;
417 
418 	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
419 				lid);
420 	if (!trap)
421 		return;
422 
423 	/* Send violation trap */
424 	trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
425 	trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
426 	trap->data.ntc_257_258.key = cpu_to_be32(key);
427 	trap->data.ntc_257_258.sl = sl << 3;
428 	trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
429 	trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
430 
431 	trap->len = sizeof(trap->data);
432 	send_trap(ibp, trap);
433 }
434 
435 /*
436  * Send a bad M_Key trap (ch. 14.3.9).
437  */
438 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
439 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
440 {
441 	struct trap_node *trap;
442 	u32 lid = ppd_from_ibp(ibp)->lid;
443 
444 	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
445 				lid);
446 	if (!trap)
447 		return;
448 
449 	/* Send violation trap */
450 	trap->data.ntc_256.lid = trap->data.issuer_lid;
451 	trap->data.ntc_256.method = mad->method;
452 	trap->data.ntc_256.attr_id = mad->attr_id;
453 	trap->data.ntc_256.attr_mod = mad->attr_mod;
454 	trap->data.ntc_256.mkey = mkey;
455 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
456 		trap->data.ntc_256.dr_slid = dr_slid;
457 		trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
458 		if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
459 			trap->data.ntc_256.dr_trunc_hop |=
460 				IB_NOTICE_TRAP_DR_TRUNC;
461 			hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
462 		}
463 		trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
464 		memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
465 		       hop_cnt);
466 	}
467 
468 	trap->len = sizeof(trap->data);
469 
470 	send_trap(ibp, trap);
471 }
472 
473 /*
474  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
475  */
476 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num)
477 {
478 	struct trap_node *trap;
479 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
480 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
481 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
482 	u32 lid = ppd_from_ibp(ibp)->lid;
483 
484 	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
485 				OPA_TRAP_CHANGE_CAPABILITY,
486 				lid);
487 	if (!trap)
488 		return;
489 
490 	trap->data.ntc_144.lid = trap->data.issuer_lid;
491 	trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
492 	trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
493 
494 	trap->len = sizeof(trap->data);
495 	send_trap(ibp, trap);
496 }
497 
498 /*
499  * Send a System Image GUID Changed trap (ch. 14.3.12).
500  */
501 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
502 {
503 	struct trap_node *trap;
504 	u32 lid = ppd_from_ibp(ibp)->lid;
505 
506 	trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
507 				lid);
508 	if (!trap)
509 		return;
510 
511 	trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
512 	trap->data.ntc_145.lid = trap->data.issuer_lid;
513 
514 	trap->len = sizeof(trap->data);
515 	send_trap(ibp, trap);
516 }
517 
518 /*
519  * Send a Node Description Changed trap (ch. 14.3.13).
520  */
521 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
522 {
523 	struct trap_node *trap;
524 	u32 lid = ppd_from_ibp(ibp)->lid;
525 
526 	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
527 				OPA_TRAP_CHANGE_CAPABILITY,
528 				lid);
529 	if (!trap)
530 		return;
531 
532 	trap->data.ntc_144.lid = trap->data.issuer_lid;
533 	trap->data.ntc_144.change_flags =
534 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
535 
536 	trap->len = sizeof(trap->data);
537 	send_trap(ibp, trap);
538 }
539 
540 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
541 				   u8 *data, struct ib_device *ibdev,
542 				   u32 port, u32 *resp_len, u32 max_len)
543 {
544 	struct opa_node_description *nd;
545 
546 	if (am || smp_length_check(sizeof(*nd), max_len)) {
547 		smp->status |= IB_SMP_INVALID_FIELD;
548 		return reply((struct ib_mad_hdr *)smp);
549 	}
550 
551 	nd = (struct opa_node_description *)data;
552 
553 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
554 
555 	if (resp_len)
556 		*resp_len += sizeof(*nd);
557 
558 	return reply((struct ib_mad_hdr *)smp);
559 }
560 
561 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
562 				   struct ib_device *ibdev, u32 port,
563 				   u32 *resp_len, u32 max_len)
564 {
565 	struct opa_node_info *ni;
566 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
567 	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
568 
569 	ni = (struct opa_node_info *)data;
570 
571 	/* GUID 0 is illegal */
572 	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
573 	    smp_length_check(sizeof(*ni), max_len) ||
574 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
575 		smp->status |= IB_SMP_INVALID_FIELD;
576 		return reply((struct ib_mad_hdr *)smp);
577 	}
578 
579 	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
580 	ni->base_version = OPA_MGMT_BASE_VERSION;
581 	ni->class_version = OPA_SM_CLASS_VERSION;
582 	ni->node_type = 1;     /* channel adapter */
583 	ni->num_ports = ibdev->phys_port_cnt;
584 	/* This is already in network order */
585 	ni->system_image_guid = ib_hfi1_sys_image_guid;
586 	ni->node_guid = ibdev->node_guid;
587 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
588 	ni->device_id = cpu_to_be16(dd->pcidev->device);
589 	ni->revision = cpu_to_be32(dd->minrev);
590 	ni->local_port_num = port;
591 	ni->vendor_id[0] = dd->oui1;
592 	ni->vendor_id[1] = dd->oui2;
593 	ni->vendor_id[2] = dd->oui3;
594 
595 	if (resp_len)
596 		*resp_len += sizeof(*ni);
597 
598 	return reply((struct ib_mad_hdr *)smp);
599 }
600 
601 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
602 			     u32 port)
603 {
604 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
605 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
606 	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
607 
608 	/* GUID 0 is illegal */
609 	if (smp->attr_mod || pidx >= dd->num_pports ||
610 	    ibdev->node_guid == 0 ||
611 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
612 		smp->status |= IB_SMP_INVALID_FIELD;
613 		return reply((struct ib_mad_hdr *)smp);
614 	}
615 
616 	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
617 	nip->base_version = OPA_MGMT_BASE_VERSION;
618 	nip->class_version = OPA_SM_CLASS_VERSION;
619 	nip->node_type = 1;     /* channel adapter */
620 	nip->num_ports = ibdev->phys_port_cnt;
621 	/* This is already in network order */
622 	nip->sys_guid = ib_hfi1_sys_image_guid;
623 	nip->node_guid = ibdev->node_guid;
624 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
625 	nip->device_id = cpu_to_be16(dd->pcidev->device);
626 	nip->revision = cpu_to_be32(dd->minrev);
627 	nip->local_port_num = port;
628 	nip->vendor_id[0] = dd->oui1;
629 	nip->vendor_id[1] = dd->oui2;
630 	nip->vendor_id[2] = dd->oui3;
631 
632 	return reply((struct ib_mad_hdr *)smp);
633 }
634 
635 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
636 {
637 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
638 }
639 
640 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
641 {
642 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
643 }
644 
645 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
646 {
647 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
648 }
649 
650 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
651 		      int mad_flags, __be64 mkey, __be32 dr_slid,
652 		      u8 return_path[], u8 hop_cnt)
653 {
654 	int valid_mkey = 0;
655 	int ret = 0;
656 
657 	/* Is the mkey in the process of expiring? */
658 	if (ibp->rvp.mkey_lease_timeout &&
659 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
660 		/* Clear timeout and mkey protection field. */
661 		ibp->rvp.mkey_lease_timeout = 0;
662 		ibp->rvp.mkeyprot = 0;
663 	}
664 
665 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
666 	    ibp->rvp.mkey == mkey)
667 		valid_mkey = 1;
668 
669 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
670 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
671 	    (mad->method == IB_MGMT_METHOD_GET ||
672 	     mad->method == IB_MGMT_METHOD_SET ||
673 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
674 		ibp->rvp.mkey_lease_timeout = 0;
675 
676 	if (!valid_mkey) {
677 		switch (mad->method) {
678 		case IB_MGMT_METHOD_GET:
679 			/* Bad mkey not a violation below level 2 */
680 			if (ibp->rvp.mkeyprot < 2)
681 				break;
682 			fallthrough;
683 		case IB_MGMT_METHOD_SET:
684 		case IB_MGMT_METHOD_TRAP_REPRESS:
685 			if (ibp->rvp.mkey_violations != 0xFFFF)
686 				++ibp->rvp.mkey_violations;
687 			if (!ibp->rvp.mkey_lease_timeout &&
688 			    ibp->rvp.mkey_lease_period)
689 				ibp->rvp.mkey_lease_timeout = jiffies +
690 					ibp->rvp.mkey_lease_period * HZ;
691 			/* Generate a trap notice. */
692 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
693 				 hop_cnt);
694 			ret = 1;
695 		}
696 	}
697 
698 	return ret;
699 }
700 
701 /*
702  * The SMA caches reads from LCB registers in case the LCB is unavailable.
703  * (The LCB is unavailable in certain link states, for example.)
704  */
705 struct lcb_datum {
706 	u32 off;
707 	u64 val;
708 };
709 
710 static struct lcb_datum lcb_cache[] = {
711 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
712 };
713 
714 static int write_lcb_cache(u32 off, u64 val)
715 {
716 	int i;
717 
718 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
719 		if (lcb_cache[i].off == off) {
720 			lcb_cache[i].val = val;
721 			return 0;
722 		}
723 	}
724 
725 	pr_warn("%s bad offset 0x%x\n", __func__, off);
726 	return -1;
727 }
728 
729 static int read_lcb_cache(u32 off, u64 *val)
730 {
731 	int i;
732 
733 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
734 		if (lcb_cache[i].off == off) {
735 			*val = lcb_cache[i].val;
736 			return 0;
737 		}
738 	}
739 
740 	pr_warn("%s bad offset 0x%x\n", __func__, off);
741 	return -1;
742 }
743 
744 void read_ltp_rtt(struct hfi1_devdata *dd)
745 {
746 	u64 reg;
747 
748 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
749 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
750 	else
751 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
752 }
753 
754 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
755 				   struct ib_device *ibdev, u32 port,
756 				   u32 *resp_len, u32 max_len)
757 {
758 	int i;
759 	struct hfi1_devdata *dd;
760 	struct hfi1_pportdata *ppd;
761 	struct hfi1_ibport *ibp;
762 	struct opa_port_info *pi = (struct opa_port_info *)data;
763 	u8 mtu;
764 	u8 credit_rate;
765 	u8 is_beaconing_active;
766 	u32 state;
767 	u32 num_ports = OPA_AM_NPORT(am);
768 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
769 	u32 buffer_units;
770 	u64 tmp = 0;
771 
772 	if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
773 		smp->status |= IB_SMP_INVALID_FIELD;
774 		return reply((struct ib_mad_hdr *)smp);
775 	}
776 
777 	dd = dd_from_ibdev(ibdev);
778 	/* IB numbers ports from 1, hw from 0 */
779 	ppd = dd->pport + (port - 1);
780 	ibp = &ppd->ibport_data;
781 
782 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
783 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
784 		smp->status |= IB_SMP_INVALID_FIELD;
785 		return reply((struct ib_mad_hdr *)smp);
786 	}
787 
788 	pi->lid = cpu_to_be32(ppd->lid);
789 
790 	/* Only return the mkey if the protection field allows it. */
791 	if (!(smp->method == IB_MGMT_METHOD_GET &&
792 	      ibp->rvp.mkey != smp->mkey &&
793 	      ibp->rvp.mkeyprot == 1))
794 		pi->mkey = ibp->rvp.mkey;
795 
796 	pi->subnet_prefix = ibp->rvp.gid_prefix;
797 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
798 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
799 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
800 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
801 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
802 
803 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
804 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
805 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
806 
807 	pi->link_width_downgrade.supported =
808 			cpu_to_be16(ppd->link_width_downgrade_supported);
809 	pi->link_width_downgrade.enabled =
810 			cpu_to_be16(ppd->link_width_downgrade_enabled);
811 	pi->link_width_downgrade.tx_active =
812 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
813 	pi->link_width_downgrade.rx_active =
814 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
815 
816 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
817 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
818 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
819 
820 	state = driver_lstate(ppd);
821 
822 	if (start_of_sm_config && (state == IB_PORT_INIT))
823 		ppd->is_sm_config_started = 1;
824 
825 	pi->port_phys_conf = (ppd->port_type & 0xf);
826 
827 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
828 	pi->port_states.ledenable_offlinereason |=
829 		ppd->is_sm_config_started << 5;
830 	/*
831 	 * This pairs with the memory barrier in hfi1_start_led_override to
832 	 * ensure that we read the correct state of LED beaconing represented
833 	 * by led_override_timer_active
834 	 */
835 	smp_rmb();
836 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
837 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
838 	pi->port_states.ledenable_offlinereason |=
839 		ppd->offline_disabled_reason;
840 
841 	pi->port_states.portphysstate_portstate =
842 		(driver_pstate(ppd) << 4) | state;
843 
844 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
845 
846 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
847 	for (i = 0; i < ppd->vls_supported; i++) {
848 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
849 		if ((i % 2) == 0)
850 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
851 		else
852 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
853 	}
854 	/* don't forget VL 15 */
855 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
856 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
857 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
858 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
859 	pi->partenforce_filterraw |=
860 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
861 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
862 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
863 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
864 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
865 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
866 	/* P_KeyViolations are counted by hardware. */
867 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
868 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
869 
870 	pi->vl.cap = ppd->vls_supported;
871 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
872 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
873 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
874 
875 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
876 
877 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
878 					  OPA_PORT_LINK_MODE_OPA << 5 |
879 					  OPA_PORT_LINK_MODE_OPA);
880 
881 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
882 
883 	pi->port_mode = cpu_to_be16(
884 				ppd->is_active_optimize_enabled ?
885 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
886 
887 	pi->port_packet_format.supported =
888 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
889 			    OPA_PORT_PACKET_FORMAT_16B);
890 	pi->port_packet_format.enabled =
891 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
892 			    OPA_PORT_PACKET_FORMAT_16B);
893 
894 	/* flit_control.interleave is (OPA V1, version .76):
895 	 * bits		use
896 	 * ----		---
897 	 * 2		res
898 	 * 2		DistanceSupported
899 	 * 2		DistanceEnabled
900 	 * 5		MaxNextLevelTxEnabled
901 	 * 5		MaxNestLevelRxSupported
902 	 *
903 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
904 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
905 	 * to 0x1.
906 	 */
907 	pi->flit_control.interleave = cpu_to_be16(0x1400);
908 
909 	pi->link_down_reason = ppd->local_link_down_reason.sma;
910 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
911 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
912 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
913 
914 	/* 32.768 usec. response time (guessing) */
915 	pi->resptimevalue = 3;
916 
917 	pi->local_port_num = port;
918 
919 	/* buffer info for FM */
920 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
921 
922 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
923 	pi->neigh_port_num = ppd->neighbor_port_number;
924 	pi->port_neigh_mode =
925 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
926 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
927 		(ppd->neighbor_fm_security ?
928 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
929 
930 	/* HFIs shall always return VL15 credits to their
931 	 * neighbor in a timely manner, without any credit return pacing.
932 	 */
933 	credit_rate = 0;
934 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
935 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
936 	buffer_units |= (credit_rate << 6) &
937 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
938 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
939 	pi->buffer_units = cpu_to_be32(buffer_units);
940 
941 	pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
942 	pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
943 					    << 3 | (OPA_MCAST_NR & 0x7));
944 
945 	/* HFI supports a replay buffer 128 LTPs in size */
946 	pi->replay_depth.buffer = 0x80;
947 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
948 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
949 
950 	/*
951 	 * this counter is 16 bits wide, but the replay_depth.wire
952 	 * variable is only 8 bits
953 	 */
954 	if (tmp > 0xff)
955 		tmp = 0xff;
956 	pi->replay_depth.wire = tmp;
957 
958 	if (resp_len)
959 		*resp_len += sizeof(struct opa_port_info);
960 
961 	return reply((struct ib_mad_hdr *)smp);
962 }
963 
964 /**
965  * get_pkeys - return the PKEY table
966  * @dd: the hfi1_ib device
967  * @port: the IB port number
968  * @pkeys: the pkey table is placed here
969  */
970 static int get_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
971 {
972 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
973 
974 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
975 
976 	return 0;
977 }
978 
979 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
980 				    struct ib_device *ibdev, u32 port,
981 				    u32 *resp_len, u32 max_len)
982 {
983 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
984 	u32 n_blocks_req = OPA_AM_NBLK(am);
985 	u32 start_block = am & 0x7ff;
986 	__be16 *p;
987 	u16 *q;
988 	int i;
989 	u16 n_blocks_avail;
990 	unsigned npkeys = hfi1_get_npkeys(dd);
991 	size_t size;
992 
993 	if (n_blocks_req == 0) {
994 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
995 			port, start_block, n_blocks_req);
996 		smp->status |= IB_SMP_INVALID_FIELD;
997 		return reply((struct ib_mad_hdr *)smp);
998 	}
999 
1000 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1001 
1002 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
1003 
1004 	if (smp_length_check(size, max_len)) {
1005 		smp->status |= IB_SMP_INVALID_FIELD;
1006 		return reply((struct ib_mad_hdr *)smp);
1007 	}
1008 
1009 	if (start_block + n_blocks_req > n_blocks_avail ||
1010 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1011 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
1012 			"avail 0x%x; blk/smp 0x%lx\n",
1013 			start_block, n_blocks_req, n_blocks_avail,
1014 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1015 		smp->status |= IB_SMP_INVALID_FIELD;
1016 		return reply((struct ib_mad_hdr *)smp);
1017 	}
1018 
1019 	p = (__be16 *)data;
1020 	q = (u16 *)data;
1021 	/* get the real pkeys if we are requesting the first block */
1022 	if (start_block == 0) {
1023 		get_pkeys(dd, port, q);
1024 		for (i = 0; i < npkeys; i++)
1025 			p[i] = cpu_to_be16(q[i]);
1026 		if (resp_len)
1027 			*resp_len += size;
1028 	} else {
1029 		smp->status |= IB_SMP_INVALID_FIELD;
1030 	}
1031 	return reply((struct ib_mad_hdr *)smp);
1032 }
1033 
1034 enum {
1035 	HFI_TRANSITION_DISALLOWED,
1036 	HFI_TRANSITION_IGNORED,
1037 	HFI_TRANSITION_ALLOWED,
1038 	HFI_TRANSITION_UNDEFINED,
1039 };
1040 
1041 /*
1042  * Use shortened names to improve readability of
1043  * {logical,physical}_state_transitions
1044  */
1045 enum {
1046 	__D = HFI_TRANSITION_DISALLOWED,
1047 	__I = HFI_TRANSITION_IGNORED,
1048 	__A = HFI_TRANSITION_ALLOWED,
1049 	__U = HFI_TRANSITION_UNDEFINED,
1050 };
1051 
1052 /*
1053  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
1054  * represented in physical_state_transitions.
1055  */
1056 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
1057 
1058 /*
1059  * Within physical_state_transitions, rows represent "old" states,
1060  * columns "new" states, and physical_state_transitions.allowed[old][new]
1061  * indicates if the transition from old state to new state is legal (see
1062  * OPAg1v1, Table 6-4).
1063  */
1064 static const struct {
1065 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
1066 } physical_state_transitions = {
1067 	{
1068 		/* 2    3    4    5    6    7    8    9   10   11 */
1069 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
1070 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
1071 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1072 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
1073 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1074 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
1075 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1076 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
1077 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1078 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
1079 	}
1080 };
1081 
1082 /*
1083  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
1084  * logical_state_transitions
1085  */
1086 
1087 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
1088 
1089 /*
1090  * Within logical_state_transitions rows represent "old" states,
1091  * columns "new" states, and logical_state_transitions.allowed[old][new]
1092  * indicates if the transition from old state to new state is legal (see
1093  * OPAg1v1, Table 9-12).
1094  */
1095 static const struct {
1096 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
1097 } logical_state_transitions = {
1098 	{
1099 		/* 1    2    3    4    5 */
1100 	/* 1 */	{ __I, __D, __D, __D, __U},
1101 	/* 2 */	{ __D, __I, __A, __D, __U},
1102 	/* 3 */	{ __D, __D, __I, __A, __U},
1103 	/* 4 */	{ __D, __D, __I, __I, __U},
1104 	/* 5 */	{ __U, __U, __U, __U, __U},
1105 	}
1106 };
1107 
1108 static int logical_transition_allowed(int old, int new)
1109 {
1110 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
1111 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
1112 		pr_warn("invalid logical state(s) (old %d new %d)\n",
1113 			old, new);
1114 		return HFI_TRANSITION_UNDEFINED;
1115 	}
1116 
1117 	if (new == IB_PORT_NOP)
1118 		return HFI_TRANSITION_ALLOWED; /* always allowed */
1119 
1120 	/* adjust states for indexing into logical_state_transitions */
1121 	old -= IB_PORT_DOWN;
1122 	new -= IB_PORT_DOWN;
1123 
1124 	if (old < 0 || new < 0)
1125 		return HFI_TRANSITION_UNDEFINED;
1126 	return logical_state_transitions.allowed[old][new];
1127 }
1128 
1129 static int physical_transition_allowed(int old, int new)
1130 {
1131 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
1132 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
1133 		pr_warn("invalid physical state(s) (old %d new %d)\n",
1134 			old, new);
1135 		return HFI_TRANSITION_UNDEFINED;
1136 	}
1137 
1138 	if (new == IB_PORTPHYSSTATE_NOP)
1139 		return HFI_TRANSITION_ALLOWED; /* always allowed */
1140 
1141 	/* adjust states for indexing into physical_state_transitions */
1142 	old -= IB_PORTPHYSSTATE_POLLING;
1143 	new -= IB_PORTPHYSSTATE_POLLING;
1144 
1145 	if (old < 0 || new < 0)
1146 		return HFI_TRANSITION_UNDEFINED;
1147 	return physical_state_transitions.allowed[old][new];
1148 }
1149 
1150 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
1151 					  u32 logical_new, u32 physical_new)
1152 {
1153 	u32 physical_old = driver_pstate(ppd);
1154 	u32 logical_old = driver_lstate(ppd);
1155 	int ret, logical_allowed, physical_allowed;
1156 
1157 	ret = logical_transition_allowed(logical_old, logical_new);
1158 	logical_allowed = ret;
1159 
1160 	if (ret == HFI_TRANSITION_DISALLOWED ||
1161 	    ret == HFI_TRANSITION_UNDEFINED) {
1162 		pr_warn("invalid logical state transition %s -> %s\n",
1163 			opa_lstate_name(logical_old),
1164 			opa_lstate_name(logical_new));
1165 		return ret;
1166 	}
1167 
1168 	ret = physical_transition_allowed(physical_old, physical_new);
1169 	physical_allowed = ret;
1170 
1171 	if (ret == HFI_TRANSITION_DISALLOWED ||
1172 	    ret == HFI_TRANSITION_UNDEFINED) {
1173 		pr_warn("invalid physical state transition %s -> %s\n",
1174 			opa_pstate_name(physical_old),
1175 			opa_pstate_name(physical_new));
1176 		return ret;
1177 	}
1178 
1179 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
1180 	    physical_allowed == HFI_TRANSITION_IGNORED)
1181 		return HFI_TRANSITION_IGNORED;
1182 
1183 	/*
1184 	 * A change request of Physical Port State from
1185 	 * 'Offline' to 'Polling' should be ignored.
1186 	 */
1187 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
1188 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
1189 		return HFI_TRANSITION_IGNORED;
1190 
1191 	/*
1192 	 * Either physical_allowed or logical_allowed is
1193 	 * HFI_TRANSITION_ALLOWED.
1194 	 */
1195 	return HFI_TRANSITION_ALLOWED;
1196 }
1197 
1198 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
1199 			   u32 logical_state, u32 phys_state, int local_mad)
1200 {
1201 	struct hfi1_devdata *dd = ppd->dd;
1202 	u32 link_state;
1203 	int ret;
1204 
1205 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
1206 	if (ret == HFI_TRANSITION_DISALLOWED ||
1207 	    ret == HFI_TRANSITION_UNDEFINED) {
1208 		/* error message emitted above */
1209 		smp->status |= IB_SMP_INVALID_FIELD;
1210 		return 0;
1211 	}
1212 
1213 	if (ret == HFI_TRANSITION_IGNORED)
1214 		return 0;
1215 
1216 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
1217 	    !(logical_state == IB_PORT_DOWN ||
1218 	      logical_state == IB_PORT_NOP)){
1219 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
1220 			logical_state, phys_state);
1221 		smp->status |= IB_SMP_INVALID_FIELD;
1222 	}
1223 
1224 	/*
1225 	 * Logical state changes are summarized in OPAv1g1 spec.,
1226 	 * Table 9-12; physical state changes are summarized in
1227 	 * OPAv1g1 spec., Table 6.4.
1228 	 */
1229 	switch (logical_state) {
1230 	case IB_PORT_NOP:
1231 		if (phys_state == IB_PORTPHYSSTATE_NOP)
1232 			break;
1233 		fallthrough;
1234 	case IB_PORT_DOWN:
1235 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
1236 			link_state = HLS_DN_DOWNDEF;
1237 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1238 			link_state = HLS_DN_POLL;
1239 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1240 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1241 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1242 			link_state = HLS_DN_DISABLE;
1243 		} else {
1244 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1245 				phys_state);
1246 			smp->status |= IB_SMP_INVALID_FIELD;
1247 			break;
1248 		}
1249 
1250 		if ((link_state == HLS_DN_POLL ||
1251 		     link_state == HLS_DN_DOWNDEF)) {
1252 			/*
1253 			 * Going to poll.  No matter what the current state,
1254 			 * always move offline first, then tune and start the
1255 			 * link.  This correctly handles a FM link bounce and
1256 			 * a link enable.  Going offline is a no-op if already
1257 			 * offline.
1258 			 */
1259 			set_link_state(ppd, HLS_DN_OFFLINE);
1260 			start_link(ppd);
1261 		} else {
1262 			set_link_state(ppd, link_state);
1263 		}
1264 		if (link_state == HLS_DN_DISABLE &&
1265 		    (ppd->offline_disabled_reason >
1266 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1267 		     ppd->offline_disabled_reason ==
1268 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1269 			ppd->offline_disabled_reason =
1270 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1271 		/*
1272 		 * Don't send a reply if the response would be sent
1273 		 * through the disabled port.
1274 		 */
1275 		if (link_state == HLS_DN_DISABLE && !local_mad)
1276 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1277 		break;
1278 	case IB_PORT_ARMED:
1279 		ret = set_link_state(ppd, HLS_UP_ARMED);
1280 		if (!ret)
1281 			send_idle_sma(dd, SMA_IDLE_ARM);
1282 		break;
1283 	case IB_PORT_ACTIVE:
1284 		if (ppd->neighbor_normal) {
1285 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1286 			if (ret == 0)
1287 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1288 		} else {
1289 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1290 			smp->status |= IB_SMP_INVALID_FIELD;
1291 		}
1292 		break;
1293 	default:
1294 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1295 			logical_state);
1296 		smp->status |= IB_SMP_INVALID_FIELD;
1297 	}
1298 
1299 	return 0;
1300 }
1301 
1302 /*
1303  * subn_set_opa_portinfo - set port information
1304  * @smp: the incoming SM packet
1305  * @ibdev: the infiniband device
1306  * @port: the port on the device
1307  *
1308  */
1309 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1310 				   struct ib_device *ibdev, u32 port,
1311 				   u32 *resp_len, u32 max_len, int local_mad)
1312 {
1313 	struct opa_port_info *pi = (struct opa_port_info *)data;
1314 	struct ib_event event;
1315 	struct hfi1_devdata *dd;
1316 	struct hfi1_pportdata *ppd;
1317 	struct hfi1_ibport *ibp;
1318 	u8 clientrereg;
1319 	unsigned long flags;
1320 	u32 smlid;
1321 	u32 lid;
1322 	u8 ls_old, ls_new, ps_new;
1323 	u8 vls;
1324 	u8 msl;
1325 	u8 crc_enabled;
1326 	u16 lse, lwe, mtu;
1327 	u32 num_ports = OPA_AM_NPORT(am);
1328 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1329 	int ret, i, invalid = 0, call_set_mtu = 0;
1330 	int call_link_downgrade_policy = 0;
1331 
1332 	if (num_ports != 1 ||
1333 	    smp_length_check(sizeof(*pi), max_len)) {
1334 		smp->status |= IB_SMP_INVALID_FIELD;
1335 		return reply((struct ib_mad_hdr *)smp);
1336 	}
1337 
1338 	lid = be32_to_cpu(pi->lid);
1339 	if (lid & 0xFF000000) {
1340 		pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
1341 		smp->status |= IB_SMP_INVALID_FIELD;
1342 		goto get_only;
1343 	}
1344 
1345 
1346 	smlid = be32_to_cpu(pi->sm_lid);
1347 	if (smlid & 0xFF000000) {
1348 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1349 		smp->status |= IB_SMP_INVALID_FIELD;
1350 		goto get_only;
1351 	}
1352 
1353 	clientrereg = (pi->clientrereg_subnettimeout &
1354 			OPA_PI_MASK_CLIENT_REREGISTER);
1355 
1356 	dd = dd_from_ibdev(ibdev);
1357 	/* IB numbers ports from 1, hw from 0 */
1358 	ppd = dd->pport + (port - 1);
1359 	ibp = &ppd->ibport_data;
1360 	event.device = ibdev;
1361 	event.element.port_num = port;
1362 
1363 	ls_old = driver_lstate(ppd);
1364 
1365 	ibp->rvp.mkey = pi->mkey;
1366 	if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
1367 		ibp->rvp.gid_prefix = pi->subnet_prefix;
1368 		event.event = IB_EVENT_GID_CHANGE;
1369 		ib_dispatch_event(&event);
1370 	}
1371 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1372 
1373 	/* Must be a valid unicast LID address. */
1374 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1375 	     (hfi1_is_16B_mcast(lid))) {
1376 		smp->status |= IB_SMP_INVALID_FIELD;
1377 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1378 			lid);
1379 	} else if (ppd->lid != lid ||
1380 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1381 		if (ppd->lid != lid)
1382 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1383 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1384 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1385 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1386 		event.event = IB_EVENT_LID_CHANGE;
1387 		ib_dispatch_event(&event);
1388 
1389 		if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
1390 			/* Manufacture GID from LID to support extended
1391 			 * addresses
1392 			 */
1393 			ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
1394 				be64_to_cpu(OPA_MAKE_ID(lid));
1395 			event.event = IB_EVENT_GID_CHANGE;
1396 			ib_dispatch_event(&event);
1397 		}
1398 	}
1399 
1400 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1401 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1402 		ppd->linkinit_reason =
1403 			(pi->partenforce_filterraw &
1404 			 OPA_PI_MASK_LINKINIT_REASON);
1405 
1406 	/* Must be a valid unicast LID address. */
1407 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1408 	     (hfi1_is_16B_mcast(smlid))) {
1409 		smp->status |= IB_SMP_INVALID_FIELD;
1410 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1411 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1412 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1413 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1414 		if (ibp->rvp.sm_ah) {
1415 			if (smlid != ibp->rvp.sm_lid)
1416 				hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
1417 			if (msl != ibp->rvp.sm_sl)
1418 				rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
1419 		}
1420 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1421 		if (smlid != ibp->rvp.sm_lid)
1422 			ibp->rvp.sm_lid = smlid;
1423 		if (msl != ibp->rvp.sm_sl)
1424 			ibp->rvp.sm_sl = msl;
1425 		event.event = IB_EVENT_SM_CHANGE;
1426 		ib_dispatch_event(&event);
1427 	}
1428 
1429 	if (pi->link_down_reason == 0) {
1430 		ppd->local_link_down_reason.sma = 0;
1431 		ppd->local_link_down_reason.latest = 0;
1432 	}
1433 
1434 	if (pi->neigh_link_down_reason == 0) {
1435 		ppd->neigh_link_down_reason.sma = 0;
1436 		ppd->neigh_link_down_reason.latest = 0;
1437 	}
1438 
1439 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1440 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1441 
1442 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1443 	lwe = be16_to_cpu(pi->link_width.enabled);
1444 	if (lwe) {
1445 		if (lwe == OPA_LINK_WIDTH_RESET ||
1446 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1447 			set_link_width_enabled(ppd, ppd->link_width_supported);
1448 		else if ((lwe & ~ppd->link_width_supported) == 0)
1449 			set_link_width_enabled(ppd, lwe);
1450 		else
1451 			smp->status |= IB_SMP_INVALID_FIELD;
1452 	}
1453 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1454 	/* LWD.E is always applied - 0 means "disabled" */
1455 	if (lwe == OPA_LINK_WIDTH_RESET ||
1456 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1457 		set_link_width_downgrade_enabled(ppd,
1458 						 ppd->
1459 						 link_width_downgrade_supported
1460 						 );
1461 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1462 		/* only set and apply if something changed */
1463 		if (lwe != ppd->link_width_downgrade_enabled) {
1464 			set_link_width_downgrade_enabled(ppd, lwe);
1465 			call_link_downgrade_policy = 1;
1466 		}
1467 	} else {
1468 		smp->status |= IB_SMP_INVALID_FIELD;
1469 	}
1470 	lse = be16_to_cpu(pi->link_speed.enabled);
1471 	if (lse) {
1472 		if (lse & be16_to_cpu(pi->link_speed.supported))
1473 			set_link_speed_enabled(ppd, lse);
1474 		else
1475 			smp->status |= IB_SMP_INVALID_FIELD;
1476 	}
1477 
1478 	ibp->rvp.mkeyprot =
1479 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1480 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1481 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1482 				    ibp->rvp.vl_high_limit);
1483 
1484 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1485 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1486 		smp->status |= IB_SMP_INVALID_FIELD;
1487 		return reply((struct ib_mad_hdr *)smp);
1488 	}
1489 	for (i = 0; i < ppd->vls_supported; i++) {
1490 		if ((i % 2) == 0)
1491 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1492 					   4) & 0xF);
1493 		else
1494 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1495 					  0xF);
1496 		if (mtu == 0xffff) {
1497 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1498 				mtu,
1499 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1500 			smp->status |= IB_SMP_INVALID_FIELD;
1501 			mtu = hfi1_max_mtu; /* use a valid MTU */
1502 		}
1503 		if (dd->vld[i].mtu != mtu) {
1504 			dd_dev_info(dd,
1505 				    "MTU change on vl %d from %d to %d\n",
1506 				    i, dd->vld[i].mtu, mtu);
1507 			dd->vld[i].mtu = mtu;
1508 			call_set_mtu++;
1509 		}
1510 	}
1511 	/* As per OPAV1 spec: VL15 must support and be configured
1512 	 * for operation with a 2048 or larger MTU.
1513 	 */
1514 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1515 	if (mtu < 2048 || mtu == 0xffff)
1516 		mtu = 2048;
1517 	if (dd->vld[15].mtu != mtu) {
1518 		dd_dev_info(dd,
1519 			    "MTU change on vl 15 from %d to %d\n",
1520 			    dd->vld[15].mtu, mtu);
1521 		dd->vld[15].mtu = mtu;
1522 		call_set_mtu++;
1523 	}
1524 	if (call_set_mtu)
1525 		set_mtu(ppd);
1526 
1527 	/* Set operational VLs */
1528 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1529 	if (vls) {
1530 		if (vls > ppd->vls_supported) {
1531 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1532 				pi->operational_vls);
1533 			smp->status |= IB_SMP_INVALID_FIELD;
1534 		} else {
1535 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1536 					    vls) == -EINVAL)
1537 				smp->status |= IB_SMP_INVALID_FIELD;
1538 		}
1539 	}
1540 
1541 	if (pi->mkey_violations == 0)
1542 		ibp->rvp.mkey_violations = 0;
1543 
1544 	if (pi->pkey_violations == 0)
1545 		ibp->rvp.pkey_violations = 0;
1546 
1547 	if (pi->qkey_violations == 0)
1548 		ibp->rvp.qkey_violations = 0;
1549 
1550 	ibp->rvp.subnet_timeout =
1551 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1552 
1553 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1554 	crc_enabled >>= 4;
1555 	crc_enabled &= 0xf;
1556 
1557 	if (crc_enabled != 0)
1558 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1559 
1560 	ppd->is_active_optimize_enabled =
1561 			!!(be16_to_cpu(pi->port_mode)
1562 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1563 
1564 	ls_new = pi->port_states.portphysstate_portstate &
1565 			OPA_PI_MASK_PORT_STATE;
1566 	ps_new = (pi->port_states.portphysstate_portstate &
1567 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1568 
1569 	if (ls_old == IB_PORT_INIT) {
1570 		if (start_of_sm_config) {
1571 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1572 				ppd->is_sm_config_started = 1;
1573 		} else if (ls_new == IB_PORT_ARMED) {
1574 			if (ppd->is_sm_config_started == 0) {
1575 				invalid = 1;
1576 				smp->status |= IB_SMP_INVALID_FIELD;
1577 			}
1578 		}
1579 	}
1580 
1581 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1582 	if (clientrereg) {
1583 		event.event = IB_EVENT_CLIENT_REREGISTER;
1584 		ib_dispatch_event(&event);
1585 	}
1586 
1587 	/*
1588 	 * Do the port state change now that the other link parameters
1589 	 * have been set.
1590 	 * Changing the port physical state only makes sense if the link
1591 	 * is down or is being set to down.
1592 	 */
1593 
1594 	if (!invalid) {
1595 		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
1596 		if (ret)
1597 			return ret;
1598 	}
1599 
1600 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1601 				      max_len);
1602 
1603 	/* restore re-reg bit per o14-12.2.1 */
1604 	pi->clientrereg_subnettimeout |= clientrereg;
1605 
1606 	/*
1607 	 * Apply the new link downgrade policy.  This may result in a link
1608 	 * bounce.  Do this after everything else so things are settled.
1609 	 * Possible problem: if setting the port state above fails, then
1610 	 * the policy change is not applied.
1611 	 */
1612 	if (call_link_downgrade_policy)
1613 		apply_link_downgrade_policy(ppd, 0);
1614 
1615 	return ret;
1616 
1617 get_only:
1618 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1619 				       max_len);
1620 }
1621 
1622 /**
1623  * set_pkeys - set the PKEY table for ctxt 0
1624  * @dd: the hfi1_ib device
1625  * @port: the IB port number
1626  * @pkeys: the PKEY table
1627  */
1628 static int set_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
1629 {
1630 	struct hfi1_pportdata *ppd;
1631 	int i;
1632 	int changed = 0;
1633 	int update_includes_mgmt_partition = 0;
1634 
1635 	/*
1636 	 * IB port one/two always maps to context zero/one,
1637 	 * always a kernel context, no locking needed
1638 	 * If we get here with ppd setup, no need to check
1639 	 * that rcd is valid.
1640 	 */
1641 	ppd = dd->pport + (port - 1);
1642 	/*
1643 	 * If the update does not include the management pkey, don't do it.
1644 	 */
1645 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1646 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1647 			update_includes_mgmt_partition = 1;
1648 			break;
1649 		}
1650 	}
1651 
1652 	if (!update_includes_mgmt_partition)
1653 		return 1;
1654 
1655 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1656 		u16 key = pkeys[i];
1657 		u16 okey = ppd->pkeys[i];
1658 
1659 		if (key == okey)
1660 			continue;
1661 		/*
1662 		 * The SM gives us the complete PKey table. We have
1663 		 * to ensure that we put the PKeys in the matching
1664 		 * slots.
1665 		 */
1666 		ppd->pkeys[i] = key;
1667 		changed = 1;
1668 	}
1669 
1670 	if (changed) {
1671 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1672 		hfi1_event_pkey_change(dd, port);
1673 	}
1674 
1675 	return 0;
1676 }
1677 
1678 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1679 				    struct ib_device *ibdev, u32 port,
1680 				    u32 *resp_len, u32 max_len)
1681 {
1682 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1683 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1684 	u32 start_block = am & 0x7ff;
1685 	u16 *p = (u16 *)data;
1686 	__be16 *q = (__be16 *)data;
1687 	int i;
1688 	u16 n_blocks_avail;
1689 	unsigned npkeys = hfi1_get_npkeys(dd);
1690 	u32 size = 0;
1691 
1692 	if (n_blocks_sent == 0) {
1693 		pr_warn("OPA Get PKey AM Invalid : P = %u; B = 0x%x; N = 0x%x\n",
1694 			port, start_block, n_blocks_sent);
1695 		smp->status |= IB_SMP_INVALID_FIELD;
1696 		return reply((struct ib_mad_hdr *)smp);
1697 	}
1698 
1699 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1700 
1701 	size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
1702 
1703 	if (smp_length_check(size, max_len)) {
1704 		smp->status |= IB_SMP_INVALID_FIELD;
1705 		return reply((struct ib_mad_hdr *)smp);
1706 	}
1707 
1708 	if (start_block + n_blocks_sent > n_blocks_avail ||
1709 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1710 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1711 			start_block, n_blocks_sent, n_blocks_avail,
1712 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1713 		smp->status |= IB_SMP_INVALID_FIELD;
1714 		return reply((struct ib_mad_hdr *)smp);
1715 	}
1716 
1717 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1718 		p[i] = be16_to_cpu(q[i]);
1719 
1720 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1721 		smp->status |= IB_SMP_INVALID_FIELD;
1722 		return reply((struct ib_mad_hdr *)smp);
1723 	}
1724 
1725 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
1726 					max_len);
1727 }
1728 
1729 #define ILLEGAL_VL 12
1730 /*
1731  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1732  * for SC15, which must map to VL15). If we don't remap things this
1733  * way it is possible for VL15 counters to increment when we try to
1734  * send on a SC which is mapped to an invalid VL.
1735  * When getting the table convert ILLEGAL_VL back to VL15.
1736  */
1737 static void filter_sc2vlt(void *data, bool set)
1738 {
1739 	int i;
1740 	u8 *pd = data;
1741 
1742 	for (i = 0; i < OPA_MAX_SCS; i++) {
1743 		if (i == 15)
1744 			continue;
1745 
1746 		if (set) {
1747 			if ((pd[i] & 0x1f) == 0xf)
1748 				pd[i] = ILLEGAL_VL;
1749 		} else {
1750 			if ((pd[i] & 0x1f) == ILLEGAL_VL)
1751 				pd[i] = 0xf;
1752 		}
1753 	}
1754 }
1755 
1756 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1757 {
1758 	u64 *val = data;
1759 
1760 	filter_sc2vlt(data, true);
1761 
1762 	write_csr(dd, SEND_SC2VLT0, *val++);
1763 	write_csr(dd, SEND_SC2VLT1, *val++);
1764 	write_csr(dd, SEND_SC2VLT2, *val++);
1765 	write_csr(dd, SEND_SC2VLT3, *val++);
1766 	write_seqlock_irq(&dd->sc2vl_lock);
1767 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1768 	write_sequnlock_irq(&dd->sc2vl_lock);
1769 	return 0;
1770 }
1771 
1772 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1773 {
1774 	u64 *val = (u64 *)data;
1775 
1776 	*val++ = read_csr(dd, SEND_SC2VLT0);
1777 	*val++ = read_csr(dd, SEND_SC2VLT1);
1778 	*val++ = read_csr(dd, SEND_SC2VLT2);
1779 	*val++ = read_csr(dd, SEND_SC2VLT3);
1780 
1781 	filter_sc2vlt((u64 *)data, false);
1782 	return 0;
1783 }
1784 
1785 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1786 				   struct ib_device *ibdev, u32 port,
1787 				   u32 *resp_len, u32 max_len)
1788 {
1789 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1790 	u8 *p = data;
1791 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1792 	unsigned i;
1793 
1794 	if (am || smp_length_check(size, max_len)) {
1795 		smp->status |= IB_SMP_INVALID_FIELD;
1796 		return reply((struct ib_mad_hdr *)smp);
1797 	}
1798 
1799 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1800 		*p++ = ibp->sl_to_sc[i];
1801 
1802 	if (resp_len)
1803 		*resp_len += size;
1804 
1805 	return reply((struct ib_mad_hdr *)smp);
1806 }
1807 
1808 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1809 				   struct ib_device *ibdev, u32 port,
1810 				   u32 *resp_len, u32 max_len)
1811 {
1812 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1813 	u8 *p = data;
1814 	size_t size = ARRAY_SIZE(ibp->sl_to_sc);
1815 	int i;
1816 	u8 sc;
1817 
1818 	if (am || smp_length_check(size, max_len)) {
1819 		smp->status |= IB_SMP_INVALID_FIELD;
1820 		return reply((struct ib_mad_hdr *)smp);
1821 	}
1822 
1823 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1824 		sc = *p++;
1825 		if (ibp->sl_to_sc[i] != sc) {
1826 			ibp->sl_to_sc[i] = sc;
1827 
1828 			/* Put all stale qps into error state */
1829 			hfi1_error_port_qps(ibp, i);
1830 		}
1831 	}
1832 
1833 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
1834 				       max_len);
1835 }
1836 
1837 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1838 				   struct ib_device *ibdev, u32 port,
1839 				   u32 *resp_len, u32 max_len)
1840 {
1841 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1842 	u8 *p = data;
1843 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1844 	unsigned i;
1845 
1846 	if (am || smp_length_check(size, max_len)) {
1847 		smp->status |= IB_SMP_INVALID_FIELD;
1848 		return reply((struct ib_mad_hdr *)smp);
1849 	}
1850 
1851 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1852 		*p++ = ibp->sc_to_sl[i];
1853 
1854 	if (resp_len)
1855 		*resp_len += size;
1856 
1857 	return reply((struct ib_mad_hdr *)smp);
1858 }
1859 
1860 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1861 				   struct ib_device *ibdev, u32 port,
1862 				   u32 *resp_len, u32 max_len)
1863 {
1864 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1865 	size_t size = ARRAY_SIZE(ibp->sc_to_sl);
1866 	u8 *p = data;
1867 	int i;
1868 
1869 	if (am || smp_length_check(size, max_len)) {
1870 		smp->status |= IB_SMP_INVALID_FIELD;
1871 		return reply((struct ib_mad_hdr *)smp);
1872 	}
1873 
1874 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1875 		ibp->sc_to_sl[i] = *p++;
1876 
1877 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
1878 				       max_len);
1879 }
1880 
1881 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1882 				    struct ib_device *ibdev, u32 port,
1883 				    u32 *resp_len, u32 max_len)
1884 {
1885 	u32 n_blocks = OPA_AM_NBLK(am);
1886 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1887 	void *vp = (void *)data;
1888 	size_t size = 4 * sizeof(u64);
1889 
1890 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1891 		smp->status |= IB_SMP_INVALID_FIELD;
1892 		return reply((struct ib_mad_hdr *)smp);
1893 	}
1894 
1895 	get_sc2vlt_tables(dd, vp);
1896 
1897 	if (resp_len)
1898 		*resp_len += size;
1899 
1900 	return reply((struct ib_mad_hdr *)smp);
1901 }
1902 
1903 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1904 				    struct ib_device *ibdev, u32 port,
1905 				    u32 *resp_len, u32 max_len)
1906 {
1907 	u32 n_blocks = OPA_AM_NBLK(am);
1908 	int async_update = OPA_AM_ASYNC(am);
1909 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1910 	void *vp = (void *)data;
1911 	struct hfi1_pportdata *ppd;
1912 	int lstate;
1913 	/*
1914 	 * set_sc2vlt_tables writes the information contained in *data
1915 	 * to four 64-bit registers SendSC2VLt[0-3]. We need to make
1916 	 * sure *max_len is not greater than the total size of the four
1917 	 * SendSC2VLt[0-3] registers.
1918 	 */
1919 	size_t size = 4 * sizeof(u64);
1920 
1921 	if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
1922 		smp->status |= IB_SMP_INVALID_FIELD;
1923 		return reply((struct ib_mad_hdr *)smp);
1924 	}
1925 
1926 	/* IB numbers ports from 1, hw from 0 */
1927 	ppd = dd->pport + (port - 1);
1928 	lstate = driver_lstate(ppd);
1929 	/*
1930 	 * it's known that async_update is 0 by this point, but include
1931 	 * the explicit check for clarity
1932 	 */
1933 	if (!async_update &&
1934 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1935 		smp->status |= IB_SMP_INVALID_FIELD;
1936 		return reply((struct ib_mad_hdr *)smp);
1937 	}
1938 
1939 	set_sc2vlt_tables(dd, vp);
1940 
1941 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
1942 					max_len);
1943 }
1944 
1945 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1946 				     struct ib_device *ibdev, u32 port,
1947 				     u32 *resp_len, u32 max_len)
1948 {
1949 	u32 n_blocks = OPA_AM_NPORT(am);
1950 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1951 	struct hfi1_pportdata *ppd;
1952 	void *vp = (void *)data;
1953 	int size = sizeof(struct sc2vlnt);
1954 
1955 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1956 		smp->status |= IB_SMP_INVALID_FIELD;
1957 		return reply((struct ib_mad_hdr *)smp);
1958 	}
1959 
1960 	ppd = dd->pport + (port - 1);
1961 
1962 	fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1963 
1964 	if (resp_len)
1965 		*resp_len += size;
1966 
1967 	return reply((struct ib_mad_hdr *)smp);
1968 }
1969 
1970 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1971 				     struct ib_device *ibdev, u32 port,
1972 				     u32 *resp_len, u32 max_len)
1973 {
1974 	u32 n_blocks = OPA_AM_NPORT(am);
1975 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1976 	struct hfi1_pportdata *ppd;
1977 	void *vp = (void *)data;
1978 	int lstate;
1979 	int size = sizeof(struct sc2vlnt);
1980 
1981 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1982 		smp->status |= IB_SMP_INVALID_FIELD;
1983 		return reply((struct ib_mad_hdr *)smp);
1984 	}
1985 
1986 	/* IB numbers ports from 1, hw from 0 */
1987 	ppd = dd->pport + (port - 1);
1988 	lstate = driver_lstate(ppd);
1989 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1990 		smp->status |= IB_SMP_INVALID_FIELD;
1991 		return reply((struct ib_mad_hdr *)smp);
1992 	}
1993 
1994 	ppd = dd->pport + (port - 1);
1995 
1996 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1997 
1998 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1999 					 resp_len, max_len);
2000 }
2001 
2002 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2003 			      struct ib_device *ibdev, u32 port,
2004 			      u32 *resp_len, u32 max_len)
2005 {
2006 	u32 nports = OPA_AM_NPORT(am);
2007 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2008 	u32 lstate;
2009 	struct hfi1_ibport *ibp;
2010 	struct hfi1_pportdata *ppd;
2011 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2012 
2013 	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2014 		smp->status |= IB_SMP_INVALID_FIELD;
2015 		return reply((struct ib_mad_hdr *)smp);
2016 	}
2017 
2018 	ibp = to_iport(ibdev, port);
2019 	ppd = ppd_from_ibp(ibp);
2020 
2021 	lstate = driver_lstate(ppd);
2022 
2023 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
2024 		ppd->is_sm_config_started = 1;
2025 
2026 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
2027 	psi->port_states.ledenable_offlinereason |=
2028 		ppd->is_sm_config_started << 5;
2029 	psi->port_states.ledenable_offlinereason |=
2030 		ppd->offline_disabled_reason;
2031 
2032 	psi->port_states.portphysstate_portstate =
2033 		(driver_pstate(ppd) << 4) | (lstate & 0xf);
2034 	psi->link_width_downgrade_tx_active =
2035 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
2036 	psi->link_width_downgrade_rx_active =
2037 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
2038 	if (resp_len)
2039 		*resp_len += sizeof(struct opa_port_state_info);
2040 
2041 	return reply((struct ib_mad_hdr *)smp);
2042 }
2043 
2044 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2045 			      struct ib_device *ibdev, u32 port,
2046 			      u32 *resp_len, u32 max_len, int local_mad)
2047 {
2048 	u32 nports = OPA_AM_NPORT(am);
2049 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2050 	u32 ls_old;
2051 	u8 ls_new, ps_new;
2052 	struct hfi1_ibport *ibp;
2053 	struct hfi1_pportdata *ppd;
2054 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2055 	int ret, invalid = 0;
2056 
2057 	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2058 		smp->status |= IB_SMP_INVALID_FIELD;
2059 		return reply((struct ib_mad_hdr *)smp);
2060 	}
2061 
2062 	ibp = to_iport(ibdev, port);
2063 	ppd = ppd_from_ibp(ibp);
2064 
2065 	ls_old = driver_lstate(ppd);
2066 
2067 	ls_new = port_states_to_logical_state(&psi->port_states);
2068 	ps_new = port_states_to_phys_state(&psi->port_states);
2069 
2070 	if (ls_old == IB_PORT_INIT) {
2071 		if (start_of_sm_config) {
2072 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
2073 				ppd->is_sm_config_started = 1;
2074 		} else if (ls_new == IB_PORT_ARMED) {
2075 			if (ppd->is_sm_config_started == 0) {
2076 				invalid = 1;
2077 				smp->status |= IB_SMP_INVALID_FIELD;
2078 			}
2079 		}
2080 	}
2081 
2082 	if (!invalid) {
2083 		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
2084 		if (ret)
2085 			return ret;
2086 	}
2087 
2088 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
2089 				  max_len);
2090 }
2091 
2092 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
2093 				     struct ib_device *ibdev, u32 port,
2094 				     u32 *resp_len, u32 max_len)
2095 {
2096 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2097 	u32 addr = OPA_AM_CI_ADDR(am);
2098 	u32 len = OPA_AM_CI_LEN(am) + 1;
2099 	int ret;
2100 
2101 	if (dd->pport->port_type != PORT_TYPE_QSFP ||
2102 	    smp_length_check(len, max_len)) {
2103 		smp->status |= IB_SMP_INVALID_FIELD;
2104 		return reply((struct ib_mad_hdr *)smp);
2105 	}
2106 
2107 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
2108 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
2109 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
2110 
2111 	/*
2112 	 * check that addr is within spec, and
2113 	 * addr and (addr + len - 1) are on the same "page"
2114 	 */
2115 	if (addr >= 4096 ||
2116 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
2117 		smp->status |= IB_SMP_INVALID_FIELD;
2118 		return reply((struct ib_mad_hdr *)smp);
2119 	}
2120 
2121 	ret = get_cable_info(dd, port, addr, len, data);
2122 
2123 	if (ret == -ENODEV) {
2124 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2125 		return reply((struct ib_mad_hdr *)smp);
2126 	}
2127 
2128 	/* The address range for the CableInfo SMA query is wider than the
2129 	 * memory available on the QSFP cable. We want to return a valid
2130 	 * response, albeit zeroed out, for address ranges beyond available
2131 	 * memory but that are within the CableInfo query spec
2132 	 */
2133 	if (ret < 0 && ret != -ERANGE) {
2134 		smp->status |= IB_SMP_INVALID_FIELD;
2135 		return reply((struct ib_mad_hdr *)smp);
2136 	}
2137 
2138 	if (resp_len)
2139 		*resp_len += len;
2140 
2141 	return reply((struct ib_mad_hdr *)smp);
2142 }
2143 
2144 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2145 			      struct ib_device *ibdev, u32 port, u32 *resp_len,
2146 			      u32 max_len)
2147 {
2148 	u32 num_ports = OPA_AM_NPORT(am);
2149 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2150 	struct hfi1_pportdata *ppd;
2151 	struct buffer_control *p = (struct buffer_control *)data;
2152 	int size = sizeof(struct buffer_control);
2153 
2154 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2155 		smp->status |= IB_SMP_INVALID_FIELD;
2156 		return reply((struct ib_mad_hdr *)smp);
2157 	}
2158 
2159 	ppd = dd->pport + (port - 1);
2160 	fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
2161 	trace_bct_get(dd, p);
2162 	if (resp_len)
2163 		*resp_len += size;
2164 
2165 	return reply((struct ib_mad_hdr *)smp);
2166 }
2167 
2168 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2169 			      struct ib_device *ibdev, u32 port, u32 *resp_len,
2170 			      u32 max_len)
2171 {
2172 	u32 num_ports = OPA_AM_NPORT(am);
2173 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2174 	struct hfi1_pportdata *ppd;
2175 	struct buffer_control *p = (struct buffer_control *)data;
2176 
2177 	if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
2178 		smp->status |= IB_SMP_INVALID_FIELD;
2179 		return reply((struct ib_mad_hdr *)smp);
2180 	}
2181 	ppd = dd->pport + (port - 1);
2182 	trace_bct_set(dd, p);
2183 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
2184 		smp->status |= IB_SMP_INVALID_FIELD;
2185 		return reply((struct ib_mad_hdr *)smp);
2186 	}
2187 
2188 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
2189 				  max_len);
2190 }
2191 
2192 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2193 				 struct ib_device *ibdev, u32 port,
2194 				 u32 *resp_len, u32 max_len)
2195 {
2196 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2197 	u32 num_ports = OPA_AM_NPORT(am);
2198 	u8 section = (am & 0x00ff0000) >> 16;
2199 	u8 *p = data;
2200 	int size = 256;
2201 
2202 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2203 		smp->status |= IB_SMP_INVALID_FIELD;
2204 		return reply((struct ib_mad_hdr *)smp);
2205 	}
2206 
2207 	switch (section) {
2208 	case OPA_VLARB_LOW_ELEMENTS:
2209 		fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
2210 		break;
2211 	case OPA_VLARB_HIGH_ELEMENTS:
2212 		fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2213 		break;
2214 	case OPA_VLARB_PREEMPT_ELEMENTS:
2215 		fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
2216 		break;
2217 	case OPA_VLARB_PREEMPT_MATRIX:
2218 		fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
2219 		break;
2220 	default:
2221 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
2222 			be32_to_cpu(smp->attr_mod));
2223 		smp->status |= IB_SMP_INVALID_FIELD;
2224 		size = 0;
2225 		break;
2226 	}
2227 
2228 	if (size > 0 && resp_len)
2229 		*resp_len += size;
2230 
2231 	return reply((struct ib_mad_hdr *)smp);
2232 }
2233 
2234 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2235 				 struct ib_device *ibdev, u32 port,
2236 				 u32 *resp_len, u32 max_len)
2237 {
2238 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2239 	u32 num_ports = OPA_AM_NPORT(am);
2240 	u8 section = (am & 0x00ff0000) >> 16;
2241 	u8 *p = data;
2242 	int size = 256;
2243 
2244 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2245 		smp->status |= IB_SMP_INVALID_FIELD;
2246 		return reply((struct ib_mad_hdr *)smp);
2247 	}
2248 
2249 	switch (section) {
2250 	case OPA_VLARB_LOW_ELEMENTS:
2251 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
2252 		break;
2253 	case OPA_VLARB_HIGH_ELEMENTS:
2254 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2255 		break;
2256 	/*
2257 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
2258 	 * can be changed from the default values
2259 	 */
2260 	case OPA_VLARB_PREEMPT_ELEMENTS:
2261 	case OPA_VLARB_PREEMPT_MATRIX:
2262 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2263 		break;
2264 	default:
2265 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
2266 			be32_to_cpu(smp->attr_mod));
2267 		smp->status |= IB_SMP_INVALID_FIELD;
2268 		break;
2269 	}
2270 
2271 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
2272 				     max_len);
2273 }
2274 
2275 struct opa_pma_mad {
2276 	struct ib_mad_hdr mad_hdr;
2277 	u8 data[2024];
2278 } __packed;
2279 
2280 struct opa_port_status_req {
2281 	__u8 port_num;
2282 	__u8 reserved[3];
2283 	__be32 vl_select_mask;
2284 };
2285 
2286 #define VL_MASK_ALL		0x00000000000080ffUL
2287 
2288 struct opa_port_status_rsp {
2289 	__u8 port_num;
2290 	__u8 reserved[3];
2291 	__be32  vl_select_mask;
2292 
2293 	/* Data counters */
2294 	__be64 port_xmit_data;
2295 	__be64 port_rcv_data;
2296 	__be64 port_xmit_pkts;
2297 	__be64 port_rcv_pkts;
2298 	__be64 port_multicast_xmit_pkts;
2299 	__be64 port_multicast_rcv_pkts;
2300 	__be64 port_xmit_wait;
2301 	__be64 sw_port_congestion;
2302 	__be64 port_rcv_fecn;
2303 	__be64 port_rcv_becn;
2304 	__be64 port_xmit_time_cong;
2305 	__be64 port_xmit_wasted_bw;
2306 	__be64 port_xmit_wait_data;
2307 	__be64 port_rcv_bubble;
2308 	__be64 port_mark_fecn;
2309 	/* Error counters */
2310 	__be64 port_rcv_constraint_errors;
2311 	__be64 port_rcv_switch_relay_errors;
2312 	__be64 port_xmit_discards;
2313 	__be64 port_xmit_constraint_errors;
2314 	__be64 port_rcv_remote_physical_errors;
2315 	__be64 local_link_integrity_errors;
2316 	__be64 port_rcv_errors;
2317 	__be64 excessive_buffer_overruns;
2318 	__be64 fm_config_errors;
2319 	__be32 link_error_recovery;
2320 	__be32 link_downed;
2321 	u8 uncorrectable_errors;
2322 
2323 	u8 link_quality_indicator; /* 5res, 3bit */
2324 	u8 res2[6];
2325 	struct _vls_pctrs {
2326 		/* per-VL Data counters */
2327 		__be64 port_vl_xmit_data;
2328 		__be64 port_vl_rcv_data;
2329 		__be64 port_vl_xmit_pkts;
2330 		__be64 port_vl_rcv_pkts;
2331 		__be64 port_vl_xmit_wait;
2332 		__be64 sw_port_vl_congestion;
2333 		__be64 port_vl_rcv_fecn;
2334 		__be64 port_vl_rcv_becn;
2335 		__be64 port_xmit_time_cong;
2336 		__be64 port_vl_xmit_wasted_bw;
2337 		__be64 port_vl_xmit_wait_data;
2338 		__be64 port_vl_rcv_bubble;
2339 		__be64 port_vl_mark_fecn;
2340 		__be64 port_vl_xmit_discards;
2341 	} vls[]; /* real array size defined by # bits set in vl_select_mask */
2342 };
2343 
2344 enum counter_selects {
2345 	CS_PORT_XMIT_DATA			= (1 << 31),
2346 	CS_PORT_RCV_DATA			= (1 << 30),
2347 	CS_PORT_XMIT_PKTS			= (1 << 29),
2348 	CS_PORT_RCV_PKTS			= (1 << 28),
2349 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2350 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2351 	CS_PORT_XMIT_WAIT			= (1 << 25),
2352 	CS_SW_PORT_CONGESTION			= (1 << 24),
2353 	CS_PORT_RCV_FECN			= (1 << 23),
2354 	CS_PORT_RCV_BECN			= (1 << 22),
2355 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2356 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2357 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2358 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2359 	CS_PORT_MARK_FECN			= (1 << 17),
2360 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2361 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2362 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2363 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2364 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2365 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2366 	CS_PORT_RCV_ERRORS			= (1 << 10),
2367 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2368 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2369 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2370 	CS_LINK_DOWNED				= (1 << 6),
2371 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2372 };
2373 
2374 struct opa_clear_port_status {
2375 	__be64 port_select_mask[4];
2376 	__be32 counter_select_mask;
2377 };
2378 
2379 struct opa_aggregate {
2380 	__be16 attr_id;
2381 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2382 	__be32 attr_mod;
2383 	u8 data[];
2384 };
2385 
2386 #define MSK_LLI 0x000000f0
2387 #define MSK_LLI_SFT 4
2388 #define MSK_LER 0x0000000f
2389 #define MSK_LER_SFT 0
2390 #define ADD_LLI 8
2391 #define ADD_LER 2
2392 
2393 /* Request contains first three fields, response contains those plus the rest */
2394 struct opa_port_data_counters_msg {
2395 	__be64 port_select_mask[4];
2396 	__be32 vl_select_mask;
2397 	__be32 resolution;
2398 
2399 	/* Response fields follow */
2400 	struct _port_dctrs {
2401 		u8 port_number;
2402 		u8 reserved2[3];
2403 		__be32 link_quality_indicator; /* 29res, 3bit */
2404 
2405 		/* Data counters */
2406 		__be64 port_xmit_data;
2407 		__be64 port_rcv_data;
2408 		__be64 port_xmit_pkts;
2409 		__be64 port_rcv_pkts;
2410 		__be64 port_multicast_xmit_pkts;
2411 		__be64 port_multicast_rcv_pkts;
2412 		__be64 port_xmit_wait;
2413 		__be64 sw_port_congestion;
2414 		__be64 port_rcv_fecn;
2415 		__be64 port_rcv_becn;
2416 		__be64 port_xmit_time_cong;
2417 		__be64 port_xmit_wasted_bw;
2418 		__be64 port_xmit_wait_data;
2419 		__be64 port_rcv_bubble;
2420 		__be64 port_mark_fecn;
2421 
2422 		__be64 port_error_counter_summary;
2423 		/* Sum of error counts/port */
2424 
2425 		struct _vls_dctrs {
2426 			/* per-VL Data counters */
2427 			__be64 port_vl_xmit_data;
2428 			__be64 port_vl_rcv_data;
2429 			__be64 port_vl_xmit_pkts;
2430 			__be64 port_vl_rcv_pkts;
2431 			__be64 port_vl_xmit_wait;
2432 			__be64 sw_port_vl_congestion;
2433 			__be64 port_vl_rcv_fecn;
2434 			__be64 port_vl_rcv_becn;
2435 			__be64 port_xmit_time_cong;
2436 			__be64 port_vl_xmit_wasted_bw;
2437 			__be64 port_vl_xmit_wait_data;
2438 			__be64 port_vl_rcv_bubble;
2439 			__be64 port_vl_mark_fecn;
2440 		} vls[];
2441 		/* array size defined by #bits set in vl_select_mask*/
2442 	} port;
2443 };
2444 
2445 struct opa_port_error_counters64_msg {
2446 	/*
2447 	 * Request contains first two fields, response contains the
2448 	 * whole magilla
2449 	 */
2450 	__be64 port_select_mask[4];
2451 	__be32 vl_select_mask;
2452 
2453 	/* Response-only fields follow */
2454 	__be32 reserved1;
2455 	struct _port_ectrs {
2456 		u8 port_number;
2457 		u8 reserved2[7];
2458 		__be64 port_rcv_constraint_errors;
2459 		__be64 port_rcv_switch_relay_errors;
2460 		__be64 port_xmit_discards;
2461 		__be64 port_xmit_constraint_errors;
2462 		__be64 port_rcv_remote_physical_errors;
2463 		__be64 local_link_integrity_errors;
2464 		__be64 port_rcv_errors;
2465 		__be64 excessive_buffer_overruns;
2466 		__be64 fm_config_errors;
2467 		__be32 link_error_recovery;
2468 		__be32 link_downed;
2469 		u8 uncorrectable_errors;
2470 		u8 reserved3[7];
2471 		struct _vls_ectrs {
2472 			__be64 port_vl_xmit_discards;
2473 		} vls[];
2474 		/* array size defined by #bits set in vl_select_mask */
2475 	} port;
2476 };
2477 
2478 struct opa_port_error_info_msg {
2479 	__be64 port_select_mask[4];
2480 	__be32 error_info_select_mask;
2481 	__be32 reserved1;
2482 	struct _port_ei {
2483 		u8 port_number;
2484 		u8 reserved2[7];
2485 
2486 		/* PortRcvErrorInfo */
2487 		struct {
2488 			u8 status_and_code;
2489 			union {
2490 				u8 raw[17];
2491 				struct {
2492 					/* EI1to12 format */
2493 					u8 packet_flit1[8];
2494 					u8 packet_flit2[8];
2495 					u8 remaining_flit_bits12;
2496 				} ei1to12;
2497 				struct {
2498 					u8 packet_bytes[8];
2499 					u8 remaining_flit_bits;
2500 				} ei13;
2501 			} ei;
2502 			u8 reserved3[6];
2503 		} __packed port_rcv_ei;
2504 
2505 		/* ExcessiveBufferOverrunInfo */
2506 		struct {
2507 			u8 status_and_sc;
2508 			u8 reserved4[7];
2509 		} __packed excessive_buffer_overrun_ei;
2510 
2511 		/* PortXmitConstraintErrorInfo */
2512 		struct {
2513 			u8 status;
2514 			u8 reserved5;
2515 			__be16 pkey;
2516 			__be32 slid;
2517 		} __packed port_xmit_constraint_ei;
2518 
2519 		/* PortRcvConstraintErrorInfo */
2520 		struct {
2521 			u8 status;
2522 			u8 reserved6;
2523 			__be16 pkey;
2524 			__be32 slid;
2525 		} __packed port_rcv_constraint_ei;
2526 
2527 		/* PortRcvSwitchRelayErrorInfo */
2528 		struct {
2529 			u8 status_and_code;
2530 			u8 reserved7[3];
2531 			__u32 error_info;
2532 		} __packed port_rcv_switch_relay_ei;
2533 
2534 		/* UncorrectableErrorInfo */
2535 		struct {
2536 			u8 status_and_code;
2537 			u8 reserved8;
2538 		} __packed uncorrectable_ei;
2539 
2540 		/* FMConfigErrorInfo */
2541 		struct {
2542 			u8 status_and_code;
2543 			u8 error_info;
2544 		} __packed fm_config_ei;
2545 		__u32 reserved9;
2546 	} port;
2547 };
2548 
2549 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2550 enum error_info_selects {
2551 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2552 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2553 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2554 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2555 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2556 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2557 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2558 };
2559 
2560 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2561 				     struct ib_device *ibdev, u32 *resp_len)
2562 {
2563 	struct opa_class_port_info *p =
2564 		(struct opa_class_port_info *)pmp->data;
2565 
2566 	memset(pmp->data, 0, sizeof(pmp->data));
2567 
2568 	if (pmp->mad_hdr.attr_mod != 0)
2569 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2570 
2571 	p->base_version = OPA_MGMT_BASE_VERSION;
2572 	p->class_version = OPA_SM_CLASS_VERSION;
2573 	/*
2574 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2575 	 */
2576 	p->cap_mask2_resp_time = cpu_to_be32(18);
2577 
2578 	if (resp_len)
2579 		*resp_len += sizeof(*p);
2580 
2581 	return reply((struct ib_mad_hdr *)pmp);
2582 }
2583 
2584 static void a0_portstatus(struct hfi1_pportdata *ppd,
2585 			  struct opa_port_status_rsp *rsp)
2586 {
2587 	if (!is_bx(ppd->dd)) {
2588 		unsigned long vl;
2589 		u64 sum_vl_xmit_wait = 0;
2590 		unsigned long vl_all_mask = VL_MASK_ALL;
2591 
2592 		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2593 			u64 tmp = sum_vl_xmit_wait +
2594 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2595 						 idx_from_vl(vl));
2596 			if (tmp < sum_vl_xmit_wait) {
2597 				/* we wrapped */
2598 				sum_vl_xmit_wait = (u64)~0;
2599 				break;
2600 			}
2601 			sum_vl_xmit_wait = tmp;
2602 		}
2603 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2604 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2605 	}
2606 }
2607 
2608 /**
2609  * tx_link_width - convert link width bitmask to integer
2610  * value representing actual link width.
2611  * @link_width: width of active link
2612  * @return: return index of the bit set in link_width var
2613  *
2614  * The function convert and return the index of bit set
2615  * that indicate the current link width.
2616  */
2617 u16 tx_link_width(u16 link_width)
2618 {
2619 	int n = LINK_WIDTH_DEFAULT;
2620 	u16 tx_width = n;
2621 
2622 	while (link_width && n) {
2623 		if (link_width & (1 << (n - 1))) {
2624 			tx_width = n;
2625 			break;
2626 		}
2627 		n--;
2628 	}
2629 
2630 	return tx_width;
2631 }
2632 
2633 /**
2634  * get_xmit_wait_counters - Convert HFI 's SendWaitCnt/SendWaitVlCnt
2635  * counter in unit of TXE cycle times to flit times.
2636  * @ppd: info of physical Hfi port
2637  * @link_width: width of active link
2638  * @link_speed: speed of active link
2639  * @vl: represent VL0-VL7, VL15 for PortVLXmitWait counters request
2640  * and if vl value is C_VL_COUNT, it represent SendWaitCnt
2641  * counter request
2642  * @return: return SendWaitCnt/SendWaitVlCnt counter value per vl.
2643  *
2644  * Convert SendWaitCnt/SendWaitVlCnt counter from TXE cycle times to
2645  * flit times. Call this function to samples these counters. This
2646  * function will calculate for previous state transition and update
2647  * current state at end of function using ppd->prev_link_width and
2648  * ppd->port_vl_xmit_wait_last to port_vl_xmit_wait_curr and link_width.
2649  */
2650 u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd,
2651 			   u16 link_width, u16 link_speed, int vl)
2652 {
2653 	u64 port_vl_xmit_wait_curr;
2654 	u64 delta_vl_xmit_wait;
2655 	u64 xmit_wait_val;
2656 
2657 	if (vl > C_VL_COUNT)
2658 		return  0;
2659 	if (vl < C_VL_COUNT)
2660 		port_vl_xmit_wait_curr =
2661 			read_port_cntr(ppd, C_TX_WAIT_VL, vl);
2662 	else
2663 		port_vl_xmit_wait_curr =
2664 			read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL);
2665 
2666 	xmit_wait_val =
2667 		port_vl_xmit_wait_curr -
2668 		ppd->port_vl_xmit_wait_last[vl];
2669 	delta_vl_xmit_wait =
2670 		convert_xmit_counter(xmit_wait_val,
2671 				     ppd->prev_link_width,
2672 				     link_speed);
2673 
2674 	ppd->vl_xmit_flit_cnt[vl] += delta_vl_xmit_wait;
2675 	ppd->port_vl_xmit_wait_last[vl] = port_vl_xmit_wait_curr;
2676 	ppd->prev_link_width = link_width;
2677 
2678 	return ppd->vl_xmit_flit_cnt[vl];
2679 }
2680 
2681 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2682 				  struct ib_device *ibdev,
2683 				  u32 port, u32 *resp_len)
2684 {
2685 	struct opa_port_status_req *req =
2686 		(struct opa_port_status_req *)pmp->data;
2687 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2688 	struct opa_port_status_rsp *rsp;
2689 	unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
2690 	unsigned long vl;
2691 	size_t response_data_size;
2692 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2693 	u32 port_num = req->port_num;
2694 	u8 num_vls = hweight64(vl_select_mask);
2695 	struct _vls_pctrs *vlinfo;
2696 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2697 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2698 	int vfi;
2699 	u64 tmp, tmp2;
2700 	u16 link_width;
2701 	u16 link_speed;
2702 
2703 	response_data_size = struct_size(rsp, vls, num_vls);
2704 	if (response_data_size > sizeof(pmp->data)) {
2705 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2706 		return reply((struct ib_mad_hdr *)pmp);
2707 	}
2708 
2709 	if (nports != 1 || (port_num && port_num != port) ||
2710 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2711 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2712 		return reply((struct ib_mad_hdr *)pmp);
2713 	}
2714 
2715 	memset(pmp->data, 0, sizeof(pmp->data));
2716 
2717 	rsp = (struct opa_port_status_rsp *)pmp->data;
2718 	if (port_num)
2719 		rsp->port_num = port_num;
2720 	else
2721 		rsp->port_num = port;
2722 
2723 	rsp->port_rcv_constraint_errors =
2724 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2725 					   CNTR_INVALID_VL));
2726 
2727 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2728 
2729 	rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
2730 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2731 					  CNTR_INVALID_VL));
2732 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2733 					 CNTR_INVALID_VL));
2734 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2735 					  CNTR_INVALID_VL));
2736 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2737 					 CNTR_INVALID_VL));
2738 	rsp->port_multicast_xmit_pkts =
2739 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2740 					  CNTR_INVALID_VL));
2741 	rsp->port_multicast_rcv_pkts =
2742 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2743 					  CNTR_INVALID_VL));
2744 	/*
2745 	 * Convert PortXmitWait counter from TXE cycle times
2746 	 * to flit times.
2747 	 */
2748 	link_width =
2749 		tx_link_width(ppd->link_width_downgrade_tx_active);
2750 	link_speed = get_link_speed(ppd->link_speed_active);
2751 	rsp->port_xmit_wait =
2752 		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
2753 						   link_speed, C_VL_COUNT));
2754 	rsp->port_rcv_fecn =
2755 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2756 	rsp->port_rcv_becn =
2757 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2758 	rsp->port_xmit_discards =
2759 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2760 					   CNTR_INVALID_VL));
2761 	rsp->port_xmit_constraint_errors =
2762 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2763 					   CNTR_INVALID_VL));
2764 	rsp->port_rcv_remote_physical_errors =
2765 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2766 					  CNTR_INVALID_VL));
2767 	rsp->local_link_integrity_errors =
2768 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2769 					  CNTR_INVALID_VL));
2770 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2771 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2772 				   CNTR_INVALID_VL);
2773 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2774 		/* overflow/wrapped */
2775 		rsp->link_error_recovery = cpu_to_be32(~0);
2776 	} else {
2777 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2778 	}
2779 	rsp->port_rcv_errors =
2780 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2781 	rsp->excessive_buffer_overruns =
2782 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2783 	rsp->fm_config_errors =
2784 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2785 					  CNTR_INVALID_VL));
2786 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2787 						      CNTR_INVALID_VL));
2788 
2789 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2790 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2791 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2792 
2793 	vlinfo = &rsp->vls[0];
2794 	vfi = 0;
2795 	/* The vl_select_mask has been checked above, and we know
2796 	 * that it contains only entries which represent valid VLs.
2797 	 * So in the for_each_set_bit() loop below, we don't need
2798 	 * any additional checks for vl.
2799 	 */
2800 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
2801 		memset(vlinfo, 0, sizeof(*vlinfo));
2802 
2803 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2804 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2805 
2806 		rsp->vls[vfi].port_vl_rcv_pkts =
2807 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2808 						  idx_from_vl(vl)));
2809 
2810 		rsp->vls[vfi].port_vl_xmit_data =
2811 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2812 						   idx_from_vl(vl)));
2813 
2814 		rsp->vls[vfi].port_vl_xmit_pkts =
2815 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2816 						   idx_from_vl(vl)));
2817 		/*
2818 		 * Convert PortVlXmitWait counter from TXE cycle
2819 		 * times to flit times.
2820 		 */
2821 		rsp->vls[vfi].port_vl_xmit_wait =
2822 			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
2823 							   link_speed,
2824 							   idx_from_vl(vl)));
2825 
2826 		rsp->vls[vfi].port_vl_rcv_fecn =
2827 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2828 						  idx_from_vl(vl)));
2829 
2830 		rsp->vls[vfi].port_vl_rcv_becn =
2831 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2832 						  idx_from_vl(vl)));
2833 
2834 		rsp->vls[vfi].port_vl_xmit_discards =
2835 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2836 						   idx_from_vl(vl)));
2837 		vlinfo++;
2838 		vfi++;
2839 	}
2840 
2841 	a0_portstatus(ppd, rsp);
2842 
2843 	if (resp_len)
2844 		*resp_len += response_data_size;
2845 
2846 	return reply((struct ib_mad_hdr *)pmp);
2847 }
2848 
2849 static u64 get_error_counter_summary(struct ib_device *ibdev, u32 port,
2850 				     u8 res_lli, u8 res_ler)
2851 {
2852 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2853 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2854 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2855 	u64 error_counter_summary = 0, tmp;
2856 
2857 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2858 						CNTR_INVALID_VL);
2859 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2860 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2861 						CNTR_INVALID_VL);
2862 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2863 						CNTR_INVALID_VL);
2864 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2865 					       CNTR_INVALID_VL);
2866 	/* local link integrity must be right-shifted by the lli resolution */
2867 	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2868 						CNTR_INVALID_VL) >> res_lli);
2869 	/* link error recovery must b right-shifted by the ler resolution */
2870 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2871 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2872 	error_counter_summary += (tmp >> res_ler);
2873 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2874 					       CNTR_INVALID_VL);
2875 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2876 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2877 					       CNTR_INVALID_VL);
2878 	/* ppd->link_downed is a 32-bit value */
2879 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2880 						CNTR_INVALID_VL);
2881 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2882 	/* this is an 8-bit quantity */
2883 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2884 
2885 	return error_counter_summary;
2886 }
2887 
2888 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
2889 {
2890 	if (!is_bx(ppd->dd)) {
2891 		unsigned long vl;
2892 		u64 sum_vl_xmit_wait = 0;
2893 		unsigned long vl_all_mask = VL_MASK_ALL;
2894 
2895 		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2896 			u64 tmp = sum_vl_xmit_wait +
2897 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2898 						 idx_from_vl(vl));
2899 			if (tmp < sum_vl_xmit_wait) {
2900 				/* we wrapped */
2901 				sum_vl_xmit_wait = (u64)~0;
2902 				break;
2903 			}
2904 			sum_vl_xmit_wait = tmp;
2905 		}
2906 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2907 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2908 	}
2909 }
2910 
2911 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2912 				   struct _port_dctrs *rsp)
2913 {
2914 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2915 
2916 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2917 						CNTR_INVALID_VL));
2918 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2919 						CNTR_INVALID_VL));
2920 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2921 						CNTR_INVALID_VL));
2922 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2923 						CNTR_INVALID_VL));
2924 	rsp->port_multicast_xmit_pkts =
2925 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2926 					  CNTR_INVALID_VL));
2927 	rsp->port_multicast_rcv_pkts =
2928 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2929 					  CNTR_INVALID_VL));
2930 }
2931 
2932 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2933 				    struct ib_device *ibdev,
2934 				    u32 port, u32 *resp_len)
2935 {
2936 	struct opa_port_data_counters_msg *req =
2937 		(struct opa_port_data_counters_msg *)pmp->data;
2938 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2939 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2940 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2941 	struct _port_dctrs *rsp;
2942 	struct _vls_dctrs *vlinfo;
2943 	size_t response_data_size;
2944 	u32 num_ports;
2945 	u8 lq, num_vls;
2946 	u8 res_lli, res_ler;
2947 	u64 port_mask;
2948 	u32 port_num;
2949 	unsigned long vl;
2950 	unsigned long vl_select_mask;
2951 	int vfi;
2952 	u16 link_width;
2953 	u16 link_speed;
2954 
2955 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2956 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2957 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2958 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2959 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2960 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2961 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2962 
2963 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2964 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2965 		return reply((struct ib_mad_hdr *)pmp);
2966 	}
2967 
2968 	/* Sanity check */
2969 	response_data_size = struct_size(req, port.vls, num_vls);
2970 
2971 	if (response_data_size > sizeof(pmp->data)) {
2972 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2973 		return reply((struct ib_mad_hdr *)pmp);
2974 	}
2975 
2976 	/*
2977 	 * The bit set in the mask needs to be consistent with the
2978 	 * port the request came in on.
2979 	 */
2980 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2981 	port_num = find_first_bit((unsigned long *)&port_mask,
2982 				  sizeof(port_mask) * 8);
2983 
2984 	if (port_num != port) {
2985 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2986 		return reply((struct ib_mad_hdr *)pmp);
2987 	}
2988 
2989 	rsp = &req->port;
2990 	memset(rsp, 0, sizeof(*rsp));
2991 
2992 	rsp->port_number = port;
2993 	/*
2994 	 * Note that link_quality_indicator is a 32 bit quantity in
2995 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2996 	 * where it's a byte).
2997 	 */
2998 	hfi1_read_link_quality(dd, &lq);
2999 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
3000 	pma_get_opa_port_dctrs(ibdev, rsp);
3001 
3002 	/*
3003 	 * Convert PortXmitWait counter from TXE
3004 	 * cycle times to flit times.
3005 	 */
3006 	link_width =
3007 		tx_link_width(ppd->link_width_downgrade_tx_active);
3008 	link_speed = get_link_speed(ppd->link_speed_active);
3009 	rsp->port_xmit_wait =
3010 		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
3011 						   link_speed, C_VL_COUNT));
3012 	rsp->port_rcv_fecn =
3013 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
3014 	rsp->port_rcv_becn =
3015 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
3016 	rsp->port_error_counter_summary =
3017 		cpu_to_be64(get_error_counter_summary(ibdev, port,
3018 						      res_lli, res_ler));
3019 
3020 	vlinfo = &rsp->vls[0];
3021 	vfi = 0;
3022 	/* The vl_select_mask has been checked above, and we know
3023 	 * that it contains only entries which represent valid VLs.
3024 	 * So in the for_each_set_bit() loop below, we don't need
3025 	 * any additional checks for vl.
3026 	 */
3027 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3028 		memset(vlinfo, 0, sizeof(*vlinfo));
3029 
3030 		rsp->vls[vfi].port_vl_xmit_data =
3031 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
3032 						   idx_from_vl(vl)));
3033 
3034 		rsp->vls[vfi].port_vl_rcv_data =
3035 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
3036 						  idx_from_vl(vl)));
3037 
3038 		rsp->vls[vfi].port_vl_xmit_pkts =
3039 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
3040 						   idx_from_vl(vl)));
3041 
3042 		rsp->vls[vfi].port_vl_rcv_pkts =
3043 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
3044 						  idx_from_vl(vl)));
3045 
3046 		/*
3047 		 * Convert PortVlXmitWait counter from TXE
3048 		 * cycle times to flit times.
3049 		 */
3050 		rsp->vls[vfi].port_vl_xmit_wait =
3051 			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
3052 							   link_speed,
3053 							   idx_from_vl(vl)));
3054 
3055 		rsp->vls[vfi].port_vl_rcv_fecn =
3056 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
3057 						  idx_from_vl(vl)));
3058 		rsp->vls[vfi].port_vl_rcv_becn =
3059 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
3060 						  idx_from_vl(vl)));
3061 
3062 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
3063 		/* rsp->port_vl_xmit_wasted_bw ??? */
3064 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
3065 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
3066 		 */
3067 		/*rsp->vls[vfi].port_vl_mark_fecn =
3068 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
3069 		 *		+ offset));
3070 		 */
3071 		vlinfo++;
3072 		vfi++;
3073 	}
3074 
3075 	a0_datacounters(ppd, rsp);
3076 
3077 	if (resp_len)
3078 		*resp_len += response_data_size;
3079 
3080 	return reply((struct ib_mad_hdr *)pmp);
3081 }
3082 
3083 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
3084 				       struct ib_device *ibdev, u32 port)
3085 {
3086 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
3087 						pmp->data;
3088 	struct _port_dctrs rsp;
3089 
3090 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3091 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3092 		goto bail;
3093 	}
3094 
3095 	memset(&rsp, 0, sizeof(rsp));
3096 	pma_get_opa_port_dctrs(ibdev, &rsp);
3097 
3098 	p->port_xmit_data = rsp.port_xmit_data;
3099 	p->port_rcv_data = rsp.port_rcv_data;
3100 	p->port_xmit_packets = rsp.port_xmit_pkts;
3101 	p->port_rcv_packets = rsp.port_rcv_pkts;
3102 	p->port_unicast_xmit_packets = 0;
3103 	p->port_unicast_rcv_packets =  0;
3104 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
3105 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
3106 
3107 bail:
3108 	return reply((struct ib_mad_hdr *)pmp);
3109 }
3110 
3111 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
3112 				   struct _port_ectrs *rsp, u32 port)
3113 {
3114 	u64 tmp, tmp2;
3115 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3116 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3117 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3118 
3119 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
3120 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3121 					CNTR_INVALID_VL);
3122 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
3123 		/* overflow/wrapped */
3124 		rsp->link_error_recovery = cpu_to_be32(~0);
3125 	} else {
3126 		rsp->link_error_recovery = cpu_to_be32(tmp2);
3127 	}
3128 
3129 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
3130 						CNTR_INVALID_VL));
3131 	rsp->port_rcv_errors =
3132 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3133 	rsp->port_rcv_remote_physical_errors =
3134 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3135 					  CNTR_INVALID_VL));
3136 	rsp->port_rcv_switch_relay_errors = 0;
3137 	rsp->port_xmit_discards =
3138 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
3139 					   CNTR_INVALID_VL));
3140 	rsp->port_xmit_constraint_errors =
3141 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
3142 					   CNTR_INVALID_VL));
3143 	rsp->port_rcv_constraint_errors =
3144 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
3145 					   CNTR_INVALID_VL));
3146 	rsp->local_link_integrity_errors =
3147 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
3148 					  CNTR_INVALID_VL));
3149 	rsp->excessive_buffer_overruns =
3150 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
3151 }
3152 
3153 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
3154 				  struct ib_device *ibdev,
3155 				  u32 port, u32 *resp_len)
3156 {
3157 	size_t response_data_size;
3158 	struct _port_ectrs *rsp;
3159 	u32 port_num;
3160 	struct opa_port_error_counters64_msg *req;
3161 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3162 	u32 num_ports;
3163 	u8 num_pslm;
3164 	u8 num_vls;
3165 	struct hfi1_ibport *ibp;
3166 	struct hfi1_pportdata *ppd;
3167 	struct _vls_ectrs *vlinfo;
3168 	unsigned long vl;
3169 	u64 port_mask, tmp;
3170 	unsigned long vl_select_mask;
3171 	int vfi;
3172 
3173 	req = (struct opa_port_error_counters64_msg *)pmp->data;
3174 
3175 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3176 
3177 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3178 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
3179 
3180 	if (num_ports != 1 || num_ports != num_pslm) {
3181 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3182 		return reply((struct ib_mad_hdr *)pmp);
3183 	}
3184 
3185 	response_data_size = struct_size(req, port.vls, num_vls);
3186 
3187 	if (response_data_size > sizeof(pmp->data)) {
3188 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3189 		return reply((struct ib_mad_hdr *)pmp);
3190 	}
3191 	/*
3192 	 * The bit set in the mask needs to be consistent with the
3193 	 * port the request came in on.
3194 	 */
3195 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3196 	port_num = find_first_bit((unsigned long *)&port_mask,
3197 				  sizeof(port_mask) * 8);
3198 
3199 	if (port_num != port) {
3200 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3201 		return reply((struct ib_mad_hdr *)pmp);
3202 	}
3203 
3204 	rsp = &req->port;
3205 
3206 	ibp = to_iport(ibdev, port_num);
3207 	ppd = ppd_from_ibp(ibp);
3208 
3209 	memset(rsp, 0, sizeof(*rsp));
3210 	rsp->port_number = port_num;
3211 
3212 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
3213 
3214 	rsp->port_rcv_remote_physical_errors =
3215 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3216 					  CNTR_INVALID_VL));
3217 	rsp->fm_config_errors =
3218 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
3219 					  CNTR_INVALID_VL));
3220 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
3221 
3222 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
3223 	rsp->port_rcv_errors =
3224 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3225 	vlinfo = &rsp->vls[0];
3226 	vfi = 0;
3227 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
3228 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3229 		memset(vlinfo, 0, sizeof(*vlinfo));
3230 		rsp->vls[vfi].port_vl_xmit_discards =
3231 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3232 						   idx_from_vl(vl)));
3233 		vlinfo += 1;
3234 		vfi++;
3235 	}
3236 
3237 	if (resp_len)
3238 		*resp_len += response_data_size;
3239 
3240 	return reply((struct ib_mad_hdr *)pmp);
3241 }
3242 
3243 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
3244 				   struct ib_device *ibdev, u32 port)
3245 {
3246 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
3247 		pmp->data;
3248 	struct _port_ectrs rsp;
3249 	u64 temp_link_overrun_errors;
3250 	u64 temp_64;
3251 	u32 temp_32;
3252 
3253 	memset(&rsp, 0, sizeof(rsp));
3254 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
3255 
3256 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3257 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3258 		goto bail;
3259 	}
3260 
3261 	p->symbol_error_counter = 0; /* N/A for OPA */
3262 
3263 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
3264 	if (temp_32 > 0xFFUL)
3265 		p->link_error_recovery_counter = 0xFF;
3266 	else
3267 		p->link_error_recovery_counter = (u8)temp_32;
3268 
3269 	temp_32 = be32_to_cpu(rsp.link_downed);
3270 	if (temp_32 > 0xFFUL)
3271 		p->link_downed_counter = 0xFF;
3272 	else
3273 		p->link_downed_counter = (u8)temp_32;
3274 
3275 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
3276 	if (temp_64 > 0xFFFFUL)
3277 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
3278 	else
3279 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
3280 
3281 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
3282 	if (temp_64 > 0xFFFFUL)
3283 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
3284 	else
3285 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
3286 
3287 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
3288 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
3289 
3290 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
3291 	if (temp_64 > 0xFFFFUL)
3292 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
3293 	else
3294 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
3295 
3296 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
3297 	if (temp_64 > 0xFFUL)
3298 		p->port_xmit_constraint_errors = 0xFF;
3299 	else
3300 		p->port_xmit_constraint_errors = (u8)temp_64;
3301 
3302 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
3303 	if (temp_64 > 0xFFUL)
3304 		p->port_rcv_constraint_errors = 0xFFUL;
3305 	else
3306 		p->port_rcv_constraint_errors = (u8)temp_64;
3307 
3308 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
3309 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
3310 	if (temp_64 > 0xFUL)
3311 		temp_64 = 0xFUL;
3312 
3313 	temp_link_overrun_errors = temp_64 << 4;
3314 
3315 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
3316 	if (temp_64 > 0xFUL)
3317 		temp_64 = 0xFUL;
3318 	temp_link_overrun_errors |= temp_64;
3319 
3320 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
3321 
3322 	p->vl15_dropped = 0; /* N/A for OPA */
3323 
3324 bail:
3325 	return reply((struct ib_mad_hdr *)pmp);
3326 }
3327 
3328 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
3329 				 struct ib_device *ibdev,
3330 				 u32 port, u32 *resp_len)
3331 {
3332 	size_t response_data_size;
3333 	struct _port_ei *rsp;
3334 	struct opa_port_error_info_msg *req;
3335 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3336 	u64 port_mask;
3337 	u32 num_ports;
3338 	u32 port_num;
3339 	u8 num_pslm;
3340 	u64 reg;
3341 
3342 	req = (struct opa_port_error_info_msg *)pmp->data;
3343 	rsp = &req->port;
3344 
3345 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3346 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3347 
3348 	memset(rsp, 0, sizeof(*rsp));
3349 
3350 	if (num_ports != 1 || num_ports != num_pslm) {
3351 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3352 		return reply((struct ib_mad_hdr *)pmp);
3353 	}
3354 
3355 	/* Sanity check */
3356 	response_data_size = sizeof(struct opa_port_error_info_msg);
3357 
3358 	if (response_data_size > sizeof(pmp->data)) {
3359 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3360 		return reply((struct ib_mad_hdr *)pmp);
3361 	}
3362 
3363 	/*
3364 	 * The bit set in the mask needs to be consistent with the port
3365 	 * the request came in on.
3366 	 */
3367 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3368 	port_num = find_first_bit((unsigned long *)&port_mask,
3369 				  sizeof(port_mask) * 8);
3370 
3371 	if (port_num != port) {
3372 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3373 		return reply((struct ib_mad_hdr *)pmp);
3374 	}
3375 	rsp->port_number = port;
3376 
3377 	/* PortRcvErrorInfo */
3378 	rsp->port_rcv_ei.status_and_code =
3379 		dd->err_info_rcvport.status_and_code;
3380 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3381 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3382 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3383 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3384 
3385 	/* ExcessiverBufferOverrunInfo */
3386 	reg = read_csr(dd, RCV_ERR_INFO);
3387 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3388 		/*
3389 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3390 		 * first pkt that encountered an excess buffer overrun
3391 		 */
3392 		u8 tmp = (u8)reg;
3393 
3394 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3395 		tmp <<= 2;
3396 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3397 		/* set the status bit */
3398 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3399 	}
3400 
3401 	rsp->port_xmit_constraint_ei.status =
3402 		dd->err_info_xmit_constraint.status;
3403 	rsp->port_xmit_constraint_ei.pkey =
3404 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3405 	rsp->port_xmit_constraint_ei.slid =
3406 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3407 
3408 	rsp->port_rcv_constraint_ei.status =
3409 		dd->err_info_rcv_constraint.status;
3410 	rsp->port_rcv_constraint_ei.pkey =
3411 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3412 	rsp->port_rcv_constraint_ei.slid =
3413 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3414 
3415 	/* UncorrectableErrorInfo */
3416 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3417 
3418 	/* FMConfigErrorInfo */
3419 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3420 
3421 	if (resp_len)
3422 		*resp_len += response_data_size;
3423 
3424 	return reply((struct ib_mad_hdr *)pmp);
3425 }
3426 
3427 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3428 				  struct ib_device *ibdev,
3429 				  u32 port, u32 *resp_len)
3430 {
3431 	struct opa_clear_port_status *req =
3432 		(struct opa_clear_port_status *)pmp->data;
3433 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3434 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3435 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3436 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3437 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3438 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3439 	unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3440 	unsigned long vl;
3441 
3442 	if ((nports != 1) || (portn != 1 << port)) {
3443 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3444 		return reply((struct ib_mad_hdr *)pmp);
3445 	}
3446 	/*
3447 	 * only counters returned by pma_get_opa_portstatus() are
3448 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3449 	 * the corresponding change should be made here as well.
3450 	 */
3451 
3452 	if (counter_select & CS_PORT_XMIT_DATA)
3453 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3454 
3455 	if (counter_select & CS_PORT_RCV_DATA)
3456 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3457 
3458 	if (counter_select & CS_PORT_XMIT_PKTS)
3459 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3460 
3461 	if (counter_select & CS_PORT_RCV_PKTS)
3462 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3463 
3464 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3465 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3466 
3467 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3468 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3469 
3470 	if (counter_select & CS_PORT_XMIT_WAIT) {
3471 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3472 		ppd->port_vl_xmit_wait_last[C_VL_COUNT] = 0;
3473 		ppd->vl_xmit_flit_cnt[C_VL_COUNT] = 0;
3474 	}
3475 	/* ignore cs_sw_portCongestion for HFIs */
3476 
3477 	if (counter_select & CS_PORT_RCV_FECN)
3478 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3479 
3480 	if (counter_select & CS_PORT_RCV_BECN)
3481 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3482 
3483 	/* ignore cs_port_xmit_time_cong for HFIs */
3484 	/* ignore cs_port_xmit_wasted_bw for now */
3485 	/* ignore cs_port_xmit_wait_data for now */
3486 	if (counter_select & CS_PORT_RCV_BUBBLE)
3487 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3488 
3489 	/* Only applicable for switch */
3490 	/* if (counter_select & CS_PORT_MARK_FECN)
3491 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3492 	 */
3493 
3494 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3495 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3496 
3497 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3498 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3499 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3500 
3501 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3502 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3503 
3504 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3505 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3506 
3507 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3508 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3509 
3510 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3511 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3512 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3513 			       CNTR_INVALID_VL, 0);
3514 	}
3515 
3516 	if (counter_select & CS_PORT_RCV_ERRORS)
3517 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3518 
3519 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3520 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3521 		dd->rcv_ovfl_cnt = 0;
3522 	}
3523 
3524 	if (counter_select & CS_FM_CONFIG_ERRORS)
3525 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3526 
3527 	if (counter_select & CS_LINK_DOWNED)
3528 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3529 
3530 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3531 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3532 
3533 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3534 		if (counter_select & CS_PORT_XMIT_DATA)
3535 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3536 
3537 		if (counter_select & CS_PORT_RCV_DATA)
3538 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3539 
3540 		if (counter_select & CS_PORT_XMIT_PKTS)
3541 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3542 
3543 		if (counter_select & CS_PORT_RCV_PKTS)
3544 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3545 
3546 		if (counter_select & CS_PORT_XMIT_WAIT) {
3547 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3548 			ppd->port_vl_xmit_wait_last[idx_from_vl(vl)] = 0;
3549 			ppd->vl_xmit_flit_cnt[idx_from_vl(vl)] = 0;
3550 		}
3551 
3552 		/* sw_port_vl_congestion is 0 for HFIs */
3553 		if (counter_select & CS_PORT_RCV_FECN)
3554 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3555 
3556 		if (counter_select & CS_PORT_RCV_BECN)
3557 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3558 
3559 		/* port_vl_xmit_time_cong is 0 for HFIs */
3560 		/* port_vl_xmit_wasted_bw ??? */
3561 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3562 		if (counter_select & CS_PORT_RCV_BUBBLE)
3563 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3564 
3565 		/* if (counter_select & CS_PORT_MARK_FECN)
3566 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3567 		 */
3568 		if (counter_select & C_SW_XMIT_DSCD_VL)
3569 			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3570 					idx_from_vl(vl), 0);
3571 	}
3572 
3573 	if (resp_len)
3574 		*resp_len += sizeof(*req);
3575 
3576 	return reply((struct ib_mad_hdr *)pmp);
3577 }
3578 
3579 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3580 				 struct ib_device *ibdev,
3581 				 u32 port, u32 *resp_len)
3582 {
3583 	struct _port_ei *rsp;
3584 	struct opa_port_error_info_msg *req;
3585 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3586 	u64 port_mask;
3587 	u32 num_ports;
3588 	u32 port_num;
3589 	u8 num_pslm;
3590 	u32 error_info_select;
3591 
3592 	req = (struct opa_port_error_info_msg *)pmp->data;
3593 	rsp = &req->port;
3594 
3595 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3596 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3597 
3598 	memset(rsp, 0, sizeof(*rsp));
3599 
3600 	if (num_ports != 1 || num_ports != num_pslm) {
3601 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3602 		return reply((struct ib_mad_hdr *)pmp);
3603 	}
3604 
3605 	/*
3606 	 * The bit set in the mask needs to be consistent with the port
3607 	 * the request came in on.
3608 	 */
3609 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3610 	port_num = find_first_bit((unsigned long *)&port_mask,
3611 				  sizeof(port_mask) * 8);
3612 
3613 	if (port_num != port) {
3614 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3615 		return reply((struct ib_mad_hdr *)pmp);
3616 	}
3617 
3618 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3619 
3620 	/* PortRcvErrorInfo */
3621 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3622 		/* turn off status bit */
3623 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3624 
3625 	/* ExcessiverBufferOverrunInfo */
3626 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3627 		/*
3628 		 * status bit is essentially kept in the h/w - bit 5 of
3629 		 * RCV_ERR_INFO
3630 		 */
3631 		write_csr(dd, RCV_ERR_INFO,
3632 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3633 
3634 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3635 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3636 
3637 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3638 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3639 
3640 	/* UncorrectableErrorInfo */
3641 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3642 		/* turn off status bit */
3643 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3644 
3645 	/* FMConfigErrorInfo */
3646 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3647 		/* turn off status bit */
3648 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3649 
3650 	if (resp_len)
3651 		*resp_len += sizeof(*req);
3652 
3653 	return reply((struct ib_mad_hdr *)pmp);
3654 }
3655 
3656 struct opa_congestion_info_attr {
3657 	__be16 congestion_info;
3658 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3659 	u8 congestion_log_length;
3660 } __packed;
3661 
3662 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3663 				    struct ib_device *ibdev, u32 port,
3664 				    u32 *resp_len, u32 max_len)
3665 {
3666 	struct opa_congestion_info_attr *p =
3667 		(struct opa_congestion_info_attr *)data;
3668 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3669 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3670 
3671 	if (smp_length_check(sizeof(*p), max_len)) {
3672 		smp->status |= IB_SMP_INVALID_FIELD;
3673 		return reply((struct ib_mad_hdr *)smp);
3674 	}
3675 
3676 	p->congestion_info = 0;
3677 	p->control_table_cap = ppd->cc_max_table_entries;
3678 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3679 
3680 	if (resp_len)
3681 		*resp_len += sizeof(*p);
3682 
3683 	return reply((struct ib_mad_hdr *)smp);
3684 }
3685 
3686 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3687 				       u8 *data, struct ib_device *ibdev,
3688 				       u32 port, u32 *resp_len, u32 max_len)
3689 {
3690 	int i;
3691 	struct opa_congestion_setting_attr *p =
3692 		(struct opa_congestion_setting_attr *)data;
3693 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3694 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3695 	struct opa_congestion_setting_entry_shadow *entries;
3696 	struct cc_state *cc_state;
3697 
3698 	if (smp_length_check(sizeof(*p), max_len)) {
3699 		smp->status |= IB_SMP_INVALID_FIELD;
3700 		return reply((struct ib_mad_hdr *)smp);
3701 	}
3702 
3703 	rcu_read_lock();
3704 
3705 	cc_state = get_cc_state(ppd);
3706 
3707 	if (!cc_state) {
3708 		rcu_read_unlock();
3709 		return reply((struct ib_mad_hdr *)smp);
3710 	}
3711 
3712 	entries = cc_state->cong_setting.entries;
3713 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3714 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3715 	for (i = 0; i < OPA_MAX_SLS; i++) {
3716 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3717 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3718 		p->entries[i].trigger_threshold =
3719 			entries[i].trigger_threshold;
3720 		p->entries[i].ccti_min = entries[i].ccti_min;
3721 	}
3722 
3723 	rcu_read_unlock();
3724 
3725 	if (resp_len)
3726 		*resp_len += sizeof(*p);
3727 
3728 	return reply((struct ib_mad_hdr *)smp);
3729 }
3730 
3731 /*
3732  * Apply congestion control information stored in the ppd to the
3733  * active structure.
3734  */
3735 static void apply_cc_state(struct hfi1_pportdata *ppd)
3736 {
3737 	struct cc_state *old_cc_state, *new_cc_state;
3738 
3739 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3740 	if (!new_cc_state)
3741 		return;
3742 
3743 	/*
3744 	 * Hold the lock for updating *and* to prevent ppd information
3745 	 * from changing during the update.
3746 	 */
3747 	spin_lock(&ppd->cc_state_lock);
3748 
3749 	old_cc_state = get_cc_state_protected(ppd);
3750 	if (!old_cc_state) {
3751 		/* never active, or shutting down */
3752 		spin_unlock(&ppd->cc_state_lock);
3753 		kfree(new_cc_state);
3754 		return;
3755 	}
3756 
3757 	*new_cc_state = *old_cc_state;
3758 
3759 	if (ppd->total_cct_entry)
3760 		new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3761 	else
3762 		new_cc_state->cct.ccti_limit = 0;
3763 
3764 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3765 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3766 
3767 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3768 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3769 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3770 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3771 
3772 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3773 
3774 	spin_unlock(&ppd->cc_state_lock);
3775 
3776 	kfree_rcu(old_cc_state, rcu);
3777 }
3778 
3779 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3780 				       struct ib_device *ibdev, u32 port,
3781 				       u32 *resp_len, u32 max_len)
3782 {
3783 	struct opa_congestion_setting_attr *p =
3784 		(struct opa_congestion_setting_attr *)data;
3785 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3786 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3787 	struct opa_congestion_setting_entry_shadow *entries;
3788 	int i;
3789 
3790 	if (smp_length_check(sizeof(*p), max_len)) {
3791 		smp->status |= IB_SMP_INVALID_FIELD;
3792 		return reply((struct ib_mad_hdr *)smp);
3793 	}
3794 
3795 	/*
3796 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3797 	 * our information is consistent with anyone trying to apply the state.
3798 	 */
3799 	spin_lock(&ppd->cc_state_lock);
3800 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3801 
3802 	entries = ppd->congestion_entries;
3803 	for (i = 0; i < OPA_MAX_SLS; i++) {
3804 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3805 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3806 		entries[i].trigger_threshold =
3807 			p->entries[i].trigger_threshold;
3808 		entries[i].ccti_min = p->entries[i].ccti_min;
3809 	}
3810 	spin_unlock(&ppd->cc_state_lock);
3811 
3812 	/* now apply the information */
3813 	apply_cc_state(ppd);
3814 
3815 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3816 					   resp_len, max_len);
3817 }
3818 
3819 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3820 					u8 *data, struct ib_device *ibdev,
3821 					u32 port, u32 *resp_len, u32 max_len)
3822 {
3823 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3824 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3825 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3826 	u64 ts;
3827 	int i;
3828 
3829 	if (am || smp_length_check(sizeof(*cong_log), max_len)) {
3830 		smp->status |= IB_SMP_INVALID_FIELD;
3831 		return reply((struct ib_mad_hdr *)smp);
3832 	}
3833 
3834 	spin_lock_irq(&ppd->cc_log_lock);
3835 
3836 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3837 	cong_log->congestion_flags = 0;
3838 	cong_log->threshold_event_counter =
3839 		cpu_to_be16(ppd->threshold_event_counter);
3840 	memcpy(cong_log->threshold_cong_event_map,
3841 	       ppd->threshold_cong_event_map,
3842 	       sizeof(cong_log->threshold_cong_event_map));
3843 	/* keep timestamp in units of 1.024 usec */
3844 	ts = ktime_get_ns() / 1024;
3845 	cong_log->current_time_stamp = cpu_to_be32(ts);
3846 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3847 		struct opa_hfi1_cong_log_event_internal *cce =
3848 			&ppd->cc_events[ppd->cc_mad_idx++];
3849 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3850 			ppd->cc_mad_idx = 0;
3851 		/*
3852 		 * Entries which are older than twice the time
3853 		 * required to wrap the counter are supposed to
3854 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3855 		 */
3856 		if ((ts - cce->timestamp) / 2 > U32_MAX)
3857 			continue;
3858 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3859 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3860 		       &cce->rqpn, 3);
3861 		cong_log->events[i].sl_svc_type_cn_entry =
3862 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3863 		cong_log->events[i].remote_lid_cn_entry =
3864 			cpu_to_be32(cce->rlid);
3865 		cong_log->events[i].timestamp_cn_entry =
3866 			cpu_to_be32(cce->timestamp);
3867 	}
3868 
3869 	/*
3870 	 * Reset threshold_cong_event_map, and threshold_event_counter
3871 	 * to 0 when log is read.
3872 	 */
3873 	memset(ppd->threshold_cong_event_map, 0x0,
3874 	       sizeof(ppd->threshold_cong_event_map));
3875 	ppd->threshold_event_counter = 0;
3876 
3877 	spin_unlock_irq(&ppd->cc_log_lock);
3878 
3879 	if (resp_len)
3880 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3881 
3882 	return reply((struct ib_mad_hdr *)smp);
3883 }
3884 
3885 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3886 				   struct ib_device *ibdev, u32 port,
3887 				   u32 *resp_len, u32 max_len)
3888 {
3889 	struct ib_cc_table_attr *cc_table_attr =
3890 		(struct ib_cc_table_attr *)data;
3891 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3892 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3893 	u32 start_block = OPA_AM_START_BLK(am);
3894 	u32 n_blocks = OPA_AM_NBLK(am);
3895 	struct ib_cc_table_entry_shadow *entries;
3896 	int i, j;
3897 	u32 sentry, eentry;
3898 	struct cc_state *cc_state;
3899 	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3900 
3901 	/* sanity check n_blocks, start_block */
3902 	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3903 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3904 		smp->status |= IB_SMP_INVALID_FIELD;
3905 		return reply((struct ib_mad_hdr *)smp);
3906 	}
3907 
3908 	rcu_read_lock();
3909 
3910 	cc_state = get_cc_state(ppd);
3911 
3912 	if (!cc_state) {
3913 		rcu_read_unlock();
3914 		return reply((struct ib_mad_hdr *)smp);
3915 	}
3916 
3917 	sentry = start_block * IB_CCT_ENTRIES;
3918 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3919 
3920 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3921 
3922 	entries = cc_state->cct.entries;
3923 
3924 	/* return n_blocks, though the last block may not be full */
3925 	for (j = 0, i = sentry; i < eentry; j++, i++)
3926 		cc_table_attr->ccti_entries[j].entry =
3927 			cpu_to_be16(entries[i].entry);
3928 
3929 	rcu_read_unlock();
3930 
3931 	if (resp_len)
3932 		*resp_len += size;
3933 
3934 	return reply((struct ib_mad_hdr *)smp);
3935 }
3936 
3937 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3938 				   struct ib_device *ibdev, u32 port,
3939 				   u32 *resp_len, u32 max_len)
3940 {
3941 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3942 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3943 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3944 	u32 start_block = OPA_AM_START_BLK(am);
3945 	u32 n_blocks = OPA_AM_NBLK(am);
3946 	struct ib_cc_table_entry_shadow *entries;
3947 	int i, j;
3948 	u32 sentry, eentry;
3949 	u16 ccti_limit;
3950 	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3951 
3952 	/* sanity check n_blocks, start_block */
3953 	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3954 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3955 		smp->status |= IB_SMP_INVALID_FIELD;
3956 		return reply((struct ib_mad_hdr *)smp);
3957 	}
3958 
3959 	sentry = start_block * IB_CCT_ENTRIES;
3960 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3961 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3962 
3963 	/* sanity check ccti_limit */
3964 	ccti_limit = be16_to_cpu(p->ccti_limit);
3965 	if (ccti_limit + 1 > eentry) {
3966 		smp->status |= IB_SMP_INVALID_FIELD;
3967 		return reply((struct ib_mad_hdr *)smp);
3968 	}
3969 
3970 	/*
3971 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3972 	 * our information is consistent with anyone trying to apply the state.
3973 	 */
3974 	spin_lock(&ppd->cc_state_lock);
3975 	ppd->total_cct_entry = ccti_limit + 1;
3976 	entries = ppd->ccti_entries;
3977 	for (j = 0, i = sentry; i < eentry; j++, i++)
3978 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3979 	spin_unlock(&ppd->cc_state_lock);
3980 
3981 	/* now apply the information */
3982 	apply_cc_state(ppd);
3983 
3984 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
3985 				       max_len);
3986 }
3987 
3988 struct opa_led_info {
3989 	__be32 rsvd_led_mask;
3990 	__be32 rsvd;
3991 };
3992 
3993 #define OPA_LED_SHIFT	31
3994 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3995 
3996 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3997 				   struct ib_device *ibdev, u32 port,
3998 				   u32 *resp_len, u32 max_len)
3999 {
4000 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
4001 	struct hfi1_pportdata *ppd = dd->pport;
4002 	struct opa_led_info *p = (struct opa_led_info *)data;
4003 	u32 nport = OPA_AM_NPORT(am);
4004 	u32 is_beaconing_active;
4005 
4006 	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
4007 		smp->status |= IB_SMP_INVALID_FIELD;
4008 		return reply((struct ib_mad_hdr *)smp);
4009 	}
4010 
4011 	/*
4012 	 * This pairs with the memory barrier in hfi1_start_led_override to
4013 	 * ensure that we read the correct state of LED beaconing represented
4014 	 * by led_override_timer_active
4015 	 */
4016 	smp_rmb();
4017 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
4018 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
4019 
4020 	if (resp_len)
4021 		*resp_len += sizeof(struct opa_led_info);
4022 
4023 	return reply((struct ib_mad_hdr *)smp);
4024 }
4025 
4026 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
4027 				   struct ib_device *ibdev, u32 port,
4028 				   u32 *resp_len, u32 max_len)
4029 {
4030 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
4031 	struct opa_led_info *p = (struct opa_led_info *)data;
4032 	u32 nport = OPA_AM_NPORT(am);
4033 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
4034 
4035 	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
4036 		smp->status |= IB_SMP_INVALID_FIELD;
4037 		return reply((struct ib_mad_hdr *)smp);
4038 	}
4039 
4040 	if (on)
4041 		hfi1_start_led_override(dd->pport, 2000, 1500);
4042 	else
4043 		shutdown_led_override(dd->pport);
4044 
4045 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
4046 				       max_len);
4047 }
4048 
4049 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4050 			    u8 *data, struct ib_device *ibdev, u32 port,
4051 			    u32 *resp_len, u32 max_len)
4052 {
4053 	int ret;
4054 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4055 
4056 	switch (attr_id) {
4057 	case IB_SMP_ATTR_NODE_DESC:
4058 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
4059 					      resp_len, max_len);
4060 		break;
4061 	case IB_SMP_ATTR_NODE_INFO:
4062 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
4063 					      resp_len, max_len);
4064 		break;
4065 	case IB_SMP_ATTR_PORT_INFO:
4066 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
4067 					      resp_len, max_len);
4068 		break;
4069 	case IB_SMP_ATTR_PKEY_TABLE:
4070 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
4071 					       resp_len, max_len);
4072 		break;
4073 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4074 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
4075 					      resp_len, max_len);
4076 		break;
4077 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4078 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
4079 					      resp_len, max_len);
4080 		break;
4081 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4082 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
4083 					       resp_len, max_len);
4084 		break;
4085 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4086 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4087 						resp_len, max_len);
4088 		break;
4089 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4090 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
4091 					 resp_len, max_len);
4092 		break;
4093 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4094 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
4095 					 resp_len, max_len);
4096 		break;
4097 	case OPA_ATTRIB_ID_CABLE_INFO:
4098 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
4099 						resp_len, max_len);
4100 		break;
4101 	case IB_SMP_ATTR_VL_ARB_TABLE:
4102 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
4103 					    resp_len, max_len);
4104 		break;
4105 	case OPA_ATTRIB_ID_CONGESTION_INFO:
4106 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
4107 					       resp_len, max_len);
4108 		break;
4109 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4110 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
4111 						  port, resp_len, max_len);
4112 		break;
4113 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
4114 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
4115 						   port, resp_len, max_len);
4116 		break;
4117 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4118 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
4119 					      resp_len, max_len);
4120 		break;
4121 	case IB_SMP_ATTR_LED_INFO:
4122 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
4123 					      resp_len, max_len);
4124 		break;
4125 	case IB_SMP_ATTR_SM_INFO:
4126 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4127 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4128 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4129 			return IB_MAD_RESULT_SUCCESS;
4130 		fallthrough;
4131 	default:
4132 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4133 		ret = reply((struct ib_mad_hdr *)smp);
4134 		break;
4135 	}
4136 	return ret;
4137 }
4138 
4139 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4140 			    u8 *data, struct ib_device *ibdev, u32 port,
4141 			    u32 *resp_len, u32 max_len, int local_mad)
4142 {
4143 	int ret;
4144 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4145 
4146 	switch (attr_id) {
4147 	case IB_SMP_ATTR_PORT_INFO:
4148 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
4149 					      resp_len, max_len, local_mad);
4150 		break;
4151 	case IB_SMP_ATTR_PKEY_TABLE:
4152 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
4153 					       resp_len, max_len);
4154 		break;
4155 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4156 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
4157 					      resp_len, max_len);
4158 		break;
4159 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4160 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
4161 					      resp_len, max_len);
4162 		break;
4163 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4164 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
4165 					       resp_len, max_len);
4166 		break;
4167 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4168 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4169 						resp_len, max_len);
4170 		break;
4171 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4172 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
4173 					 resp_len, max_len, local_mad);
4174 		break;
4175 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4176 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
4177 					 resp_len, max_len);
4178 		break;
4179 	case IB_SMP_ATTR_VL_ARB_TABLE:
4180 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
4181 					    resp_len, max_len);
4182 		break;
4183 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4184 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
4185 						  port, resp_len, max_len);
4186 		break;
4187 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4188 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
4189 					      resp_len, max_len);
4190 		break;
4191 	case IB_SMP_ATTR_LED_INFO:
4192 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
4193 					      resp_len, max_len);
4194 		break;
4195 	case IB_SMP_ATTR_SM_INFO:
4196 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4197 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4198 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4199 			return IB_MAD_RESULT_SUCCESS;
4200 		fallthrough;
4201 	default:
4202 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4203 		ret = reply((struct ib_mad_hdr *)smp);
4204 		break;
4205 	}
4206 	return ret;
4207 }
4208 
4209 static inline void set_aggr_error(struct opa_aggregate *ag)
4210 {
4211 	ag->err_reqlength |= cpu_to_be16(0x8000);
4212 }
4213 
4214 static int subn_get_opa_aggregate(struct opa_smp *smp,
4215 				  struct ib_device *ibdev, u32 port,
4216 				  u32 *resp_len)
4217 {
4218 	int i;
4219 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4220 	u8 *next_smp = opa_get_smp_data(smp);
4221 
4222 	if (num_attr < 1 || num_attr > 117) {
4223 		smp->status |= IB_SMP_INVALID_FIELD;
4224 		return reply((struct ib_mad_hdr *)smp);
4225 	}
4226 
4227 	for (i = 0; i < num_attr; i++) {
4228 		struct opa_aggregate *agg;
4229 		size_t agg_data_len;
4230 		size_t agg_size;
4231 		u32 am;
4232 
4233 		agg = (struct opa_aggregate *)next_smp;
4234 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4235 		agg_size = sizeof(*agg) + agg_data_len;
4236 		am = be32_to_cpu(agg->attr_mod);
4237 
4238 		*resp_len += agg_size;
4239 
4240 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4241 			smp->status |= IB_SMP_INVALID_FIELD;
4242 			return reply((struct ib_mad_hdr *)smp);
4243 		}
4244 
4245 		/* zero the payload for this segment */
4246 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
4247 
4248 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
4249 				       ibdev, port, NULL, (u32)agg_data_len);
4250 
4251 		if (smp->status & IB_SMP_INVALID_FIELD)
4252 			break;
4253 		if (smp->status & ~IB_SMP_DIRECTION) {
4254 			set_aggr_error(agg);
4255 			return reply((struct ib_mad_hdr *)smp);
4256 		}
4257 		next_smp += agg_size;
4258 	}
4259 
4260 	return reply((struct ib_mad_hdr *)smp);
4261 }
4262 
4263 static int subn_set_opa_aggregate(struct opa_smp *smp,
4264 				  struct ib_device *ibdev, u32 port,
4265 				  u32 *resp_len, int local_mad)
4266 {
4267 	int i;
4268 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4269 	u8 *next_smp = opa_get_smp_data(smp);
4270 
4271 	if (num_attr < 1 || num_attr > 117) {
4272 		smp->status |= IB_SMP_INVALID_FIELD;
4273 		return reply((struct ib_mad_hdr *)smp);
4274 	}
4275 
4276 	for (i = 0; i < num_attr; i++) {
4277 		struct opa_aggregate *agg;
4278 		size_t agg_data_len;
4279 		size_t agg_size;
4280 		u32 am;
4281 
4282 		agg = (struct opa_aggregate *)next_smp;
4283 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4284 		agg_size = sizeof(*agg) + agg_data_len;
4285 		am = be32_to_cpu(agg->attr_mod);
4286 
4287 		*resp_len += agg_size;
4288 
4289 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4290 			smp->status |= IB_SMP_INVALID_FIELD;
4291 			return reply((struct ib_mad_hdr *)smp);
4292 		}
4293 
4294 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
4295 				       ibdev, port, NULL, (u32)agg_data_len,
4296 				       local_mad);
4297 
4298 		if (smp->status & IB_SMP_INVALID_FIELD)
4299 			break;
4300 		if (smp->status & ~IB_SMP_DIRECTION) {
4301 			set_aggr_error(agg);
4302 			return reply((struct ib_mad_hdr *)smp);
4303 		}
4304 		next_smp += agg_size;
4305 	}
4306 
4307 	return reply((struct ib_mad_hdr *)smp);
4308 }
4309 
4310 /*
4311  * OPAv1 specifies that, on the transition to link up, these counters
4312  * are cleared:
4313  *   PortRcvErrors [*]
4314  *   LinkErrorRecovery
4315  *   LocalLinkIntegrityErrors
4316  *   ExcessiveBufferOverruns [*]
4317  *
4318  * [*] Error info associated with these counters is retained, but the
4319  * error info status is reset to 0.
4320  */
4321 void clear_linkup_counters(struct hfi1_devdata *dd)
4322 {
4323 	/* PortRcvErrors */
4324 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
4325 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
4326 	/* LinkErrorRecovery */
4327 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
4328 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
4329 	/* LocalLinkIntegrityErrors */
4330 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
4331 	/* ExcessiveBufferOverruns */
4332 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
4333 	dd->rcv_ovfl_cnt = 0;
4334 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
4335 }
4336 
4337 static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp)
4338 {
4339 	unsigned int i;
4340 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4341 
4342 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
4343 		if (ppd->pkeys[i] == FULL_MGMT_P_KEY)
4344 			return 1;
4345 
4346 	return 0;
4347 }
4348 
4349 /*
4350  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
4351  * local node, 0 otherwise.
4352  */
4353 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
4354 			const struct ib_wc *in_wc)
4355 {
4356 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4357 	const struct opa_smp *smp = (const struct opa_smp *)mad;
4358 
4359 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
4360 		return (smp->hop_cnt == 0 &&
4361 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
4362 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
4363 	}
4364 
4365 	return (in_wc->slid == ppd->lid);
4366 }
4367 
4368 /*
4369  * opa_local_smp_check() should only be called on MADs for which
4370  * is_local_mad() returns true. It applies the SMP checks that are
4371  * specific to SMPs which are sent from, and destined to this node.
4372  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
4373  * otherwise.
4374  *
4375  * SMPs which arrive from other nodes are instead checked by
4376  * opa_smp_check().
4377  */
4378 static int opa_local_smp_check(struct hfi1_ibport *ibp,
4379 			       const struct ib_wc *in_wc)
4380 {
4381 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4382 	u16 pkey;
4383 
4384 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4385 		return 1;
4386 
4387 	pkey = ppd->pkeys[in_wc->pkey_index];
4388 	/*
4389 	 * We need to do the "node-local" checks specified in OPAv1,
4390 	 * rev 0.90, section 9.10.26, which are:
4391 	 *   - pkey is 0x7fff, or 0xffff
4392 	 *   - Source QPN == 0 || Destination QPN == 0
4393 	 *   - the MAD header's management class is either
4394 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4395 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4396 	 *   - SLID != 0
4397 	 *
4398 	 * However, we know (and so don't need to check again) that,
4399 	 * for local SMPs, the MAD stack passes MADs with:
4400 	 *   - Source QPN of 0
4401 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4402 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4403 	 *     our own port's lid
4404 	 *
4405 	 */
4406 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4407 		return 0;
4408 	ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
4409 	return 1;
4410 }
4411 
4412 /**
4413  * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets.
4414  * @ibp: IB port data
4415  * @in_mad: MAD packet with header and data
4416  * @in_wc: Work completion data such as source LID, port number, etc.
4417  *
4418  * These are all the possible logic rules for validating a pkey:
4419  *
4420  * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY,
4421  *    and NOT self-originated packet:
4422  *     Drop MAD packet as it should always be part of the
4423  *     management partition unless it's a self-originated packet.
4424  *
4425  * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table:
4426  *     The packet is coming from a management node and the receiving node
4427  *     is also a management node, so it is safe for the packet to go through.
4428  *
4429  * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table:
4430  *     Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table.
4431  *     It could be an FM misconfiguration.
4432  *
4433  * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table:
4434  *     It is safe for the packet to go through since a non-management node is
4435  *     talking to another non-management node.
4436  *
4437  * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table:
4438  *     Drop the packet because a non-management node is talking to a
4439  *     management node, and it could be an attack.
4440  *
4441  * For the implementation, these rules can be simplied to only checking
4442  * for (a) and (e). There's no need to check for rule (b) as
4443  * the packet doesn't need to be dropped. Rule (c) is not possible in
4444  * the driver as LIM_MGMT_P_KEY is always in the pkey table.
4445  *
4446  * Return:
4447  * 0 - pkey is okay, -EINVAL it's a bad pkey
4448  */
4449 static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
4450 				    const struct opa_mad *in_mad,
4451 				    const struct ib_wc *in_wc)
4452 {
4453 	u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index);
4454 
4455 	/* Rule (a) from above */
4456 	if (!is_local_mad(ibp, in_mad, in_wc) &&
4457 	    pkey_value != LIM_MGMT_P_KEY &&
4458 	    pkey_value != FULL_MGMT_P_KEY)
4459 		return -EINVAL;
4460 
4461 	/* Rule (e) from above */
4462 	if (pkey_value == LIM_MGMT_P_KEY &&
4463 	    is_full_mgmt_pkey_in_table(ibp))
4464 		return -EINVAL;
4465 
4466 	return 0;
4467 }
4468 
4469 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4470 			    u32 port, const struct opa_mad *in_mad,
4471 			    struct opa_mad *out_mad,
4472 			    u32 *resp_len, int local_mad)
4473 {
4474 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4475 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4476 	u8 *data;
4477 	u32 am, data_size;
4478 	__be16 attr_id;
4479 	int ret;
4480 
4481 	*out_mad = *in_mad;
4482 	data = opa_get_smp_data(smp);
4483 	data_size = (u32)opa_get_smp_data_size(smp);
4484 
4485 	am = be32_to_cpu(smp->attr_mod);
4486 	attr_id = smp->attr_id;
4487 	if (smp->class_version != OPA_SM_CLASS_VERSION) {
4488 		smp->status |= IB_SMP_UNSUP_VERSION;
4489 		ret = reply((struct ib_mad_hdr *)smp);
4490 		return ret;
4491 	}
4492 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4493 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4494 			 smp->hop_cnt);
4495 	if (ret) {
4496 		u32 port_num = be32_to_cpu(smp->attr_mod);
4497 
4498 		/*
4499 		 * If this is a get/set portinfo, we already check the
4500 		 * M_Key if the MAD is for another port and the M_Key
4501 		 * is OK on the receiving port. This check is needed
4502 		 * to increment the error counters when the M_Key
4503 		 * fails to match on *both* ports.
4504 		 */
4505 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4506 		    (smp->method == IB_MGMT_METHOD_GET ||
4507 		     smp->method == IB_MGMT_METHOD_SET) &&
4508 		    port_num && port_num <= ibdev->phys_port_cnt &&
4509 		    port != port_num)
4510 			(void)check_mkey(to_iport(ibdev, port_num),
4511 					  (struct ib_mad_hdr *)smp, 0,
4512 					  smp->mkey, smp->route.dr.dr_slid,
4513 					  smp->route.dr.return_path,
4514 					  smp->hop_cnt);
4515 		ret = IB_MAD_RESULT_FAILURE;
4516 		return ret;
4517 	}
4518 
4519 	*resp_len = opa_get_smp_header_size(smp);
4520 
4521 	switch (smp->method) {
4522 	case IB_MGMT_METHOD_GET:
4523 		switch (attr_id) {
4524 		default:
4525 			clear_opa_smp_data(smp);
4526 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4527 					       ibdev, port, resp_len,
4528 					       data_size);
4529 			break;
4530 		case OPA_ATTRIB_ID_AGGREGATE:
4531 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4532 						     resp_len);
4533 			break;
4534 		}
4535 		break;
4536 	case IB_MGMT_METHOD_SET:
4537 		switch (attr_id) {
4538 		default:
4539 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4540 					       ibdev, port, resp_len,
4541 					       data_size, local_mad);
4542 			break;
4543 		case OPA_ATTRIB_ID_AGGREGATE:
4544 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4545 						     resp_len, local_mad);
4546 			break;
4547 		}
4548 		break;
4549 	case IB_MGMT_METHOD_TRAP:
4550 	case IB_MGMT_METHOD_REPORT:
4551 	case IB_MGMT_METHOD_REPORT_RESP:
4552 	case IB_MGMT_METHOD_GET_RESP:
4553 		/*
4554 		 * The ib_mad module will call us to process responses
4555 		 * before checking for other consumers.
4556 		 * Just tell the caller to process it normally.
4557 		 */
4558 		ret = IB_MAD_RESULT_SUCCESS;
4559 		break;
4560 	case IB_MGMT_METHOD_TRAP_REPRESS:
4561 		subn_handle_opa_trap_repress(ibp, smp);
4562 		/* Always successful */
4563 		ret = IB_MAD_RESULT_SUCCESS;
4564 		break;
4565 	default:
4566 		smp->status |= IB_SMP_UNSUP_METHOD;
4567 		ret = reply((struct ib_mad_hdr *)smp);
4568 		break;
4569 	}
4570 
4571 	return ret;
4572 }
4573 
4574 static int process_subn(struct ib_device *ibdev, int mad_flags,
4575 			u32 port, const struct ib_mad *in_mad,
4576 			struct ib_mad *out_mad)
4577 {
4578 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4579 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4580 	int ret;
4581 
4582 	*out_mad = *in_mad;
4583 	if (smp->class_version != 1) {
4584 		smp->status |= IB_SMP_UNSUP_VERSION;
4585 		ret = reply((struct ib_mad_hdr *)smp);
4586 		return ret;
4587 	}
4588 
4589 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4590 			 smp->mkey, (__force __be32)smp->dr_slid,
4591 			 smp->return_path, smp->hop_cnt);
4592 	if (ret) {
4593 		u32 port_num = be32_to_cpu(smp->attr_mod);
4594 
4595 		/*
4596 		 * If this is a get/set portinfo, we already check the
4597 		 * M_Key if the MAD is for another port and the M_Key
4598 		 * is OK on the receiving port. This check is needed
4599 		 * to increment the error counters when the M_Key
4600 		 * fails to match on *both* ports.
4601 		 */
4602 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4603 		    (smp->method == IB_MGMT_METHOD_GET ||
4604 		     smp->method == IB_MGMT_METHOD_SET) &&
4605 		    port_num && port_num <= ibdev->phys_port_cnt &&
4606 		    port != port_num)
4607 			(void)check_mkey(to_iport(ibdev, port_num),
4608 					 (struct ib_mad_hdr *)smp, 0,
4609 					 smp->mkey,
4610 					 (__force __be32)smp->dr_slid,
4611 					 smp->return_path, smp->hop_cnt);
4612 		ret = IB_MAD_RESULT_FAILURE;
4613 		return ret;
4614 	}
4615 
4616 	switch (smp->method) {
4617 	case IB_MGMT_METHOD_GET:
4618 		switch (smp->attr_id) {
4619 		case IB_SMP_ATTR_NODE_INFO:
4620 			ret = subn_get_nodeinfo(smp, ibdev, port);
4621 			break;
4622 		default:
4623 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4624 			ret = reply((struct ib_mad_hdr *)smp);
4625 			break;
4626 		}
4627 		break;
4628 	}
4629 
4630 	return ret;
4631 }
4632 
4633 static int process_perf(struct ib_device *ibdev, u32 port,
4634 			const struct ib_mad *in_mad,
4635 			struct ib_mad *out_mad)
4636 {
4637 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4638 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4639 						&pmp->data;
4640 	int ret = IB_MAD_RESULT_FAILURE;
4641 
4642 	*out_mad = *in_mad;
4643 	if (pmp->mad_hdr.class_version != 1) {
4644 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4645 		ret = reply((struct ib_mad_hdr *)pmp);
4646 		return ret;
4647 	}
4648 
4649 	switch (pmp->mad_hdr.method) {
4650 	case IB_MGMT_METHOD_GET:
4651 		switch (pmp->mad_hdr.attr_id) {
4652 		case IB_PMA_PORT_COUNTERS:
4653 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4654 			break;
4655 		case IB_PMA_PORT_COUNTERS_EXT:
4656 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4657 			break;
4658 		case IB_PMA_CLASS_PORT_INFO:
4659 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4660 			ret = reply((struct ib_mad_hdr *)pmp);
4661 			break;
4662 		default:
4663 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4664 			ret = reply((struct ib_mad_hdr *)pmp);
4665 			break;
4666 		}
4667 		break;
4668 
4669 	case IB_MGMT_METHOD_SET:
4670 		if (pmp->mad_hdr.attr_id) {
4671 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4672 			ret = reply((struct ib_mad_hdr *)pmp);
4673 		}
4674 		break;
4675 
4676 	case IB_MGMT_METHOD_TRAP:
4677 	case IB_MGMT_METHOD_GET_RESP:
4678 		/*
4679 		 * The ib_mad module will call us to process responses
4680 		 * before checking for other consumers.
4681 		 * Just tell the caller to process it normally.
4682 		 */
4683 		ret = IB_MAD_RESULT_SUCCESS;
4684 		break;
4685 
4686 	default:
4687 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4688 		ret = reply((struct ib_mad_hdr *)pmp);
4689 		break;
4690 	}
4691 
4692 	return ret;
4693 }
4694 
4695 static int process_perf_opa(struct ib_device *ibdev, u32 port,
4696 			    const struct opa_mad *in_mad,
4697 			    struct opa_mad *out_mad, u32 *resp_len)
4698 {
4699 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4700 	int ret;
4701 
4702 	*out_mad = *in_mad;
4703 
4704 	if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4705 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4706 		return reply((struct ib_mad_hdr *)pmp);
4707 	}
4708 
4709 	*resp_len = sizeof(pmp->mad_hdr);
4710 
4711 	switch (pmp->mad_hdr.method) {
4712 	case IB_MGMT_METHOD_GET:
4713 		switch (pmp->mad_hdr.attr_id) {
4714 		case IB_PMA_CLASS_PORT_INFO:
4715 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4716 			break;
4717 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4718 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4719 						     resp_len);
4720 			break;
4721 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4722 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4723 						       resp_len);
4724 			break;
4725 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4726 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4727 						     resp_len);
4728 			break;
4729 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4730 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4731 						    resp_len);
4732 			break;
4733 		default:
4734 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4735 			ret = reply((struct ib_mad_hdr *)pmp);
4736 			break;
4737 		}
4738 		break;
4739 
4740 	case IB_MGMT_METHOD_SET:
4741 		switch (pmp->mad_hdr.attr_id) {
4742 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4743 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4744 						     resp_len);
4745 			break;
4746 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4747 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4748 						    resp_len);
4749 			break;
4750 		default:
4751 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4752 			ret = reply((struct ib_mad_hdr *)pmp);
4753 			break;
4754 		}
4755 		break;
4756 
4757 	case IB_MGMT_METHOD_TRAP:
4758 	case IB_MGMT_METHOD_GET_RESP:
4759 		/*
4760 		 * The ib_mad module will call us to process responses
4761 		 * before checking for other consumers.
4762 		 * Just tell the caller to process it normally.
4763 		 */
4764 		ret = IB_MAD_RESULT_SUCCESS;
4765 		break;
4766 
4767 	default:
4768 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4769 		ret = reply((struct ib_mad_hdr *)pmp);
4770 		break;
4771 	}
4772 
4773 	return ret;
4774 }
4775 
4776 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4777 				u32 port, const struct ib_wc *in_wc,
4778 				const struct ib_grh *in_grh,
4779 				const struct opa_mad *in_mad,
4780 				struct opa_mad *out_mad, size_t *out_mad_size,
4781 				u16 *out_mad_pkey_index)
4782 {
4783 	int ret;
4784 	int pkey_idx;
4785 	int local_mad = 0;
4786 	u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
4787 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4788 
4789 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4790 	if (pkey_idx < 0) {
4791 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4792 			hfi1_get_pkey(ibp, 1));
4793 		pkey_idx = 1;
4794 	}
4795 	*out_mad_pkey_index = (u16)pkey_idx;
4796 
4797 	switch (in_mad->mad_hdr.mgmt_class) {
4798 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4799 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4800 		local_mad = is_local_mad(ibp, in_mad, in_wc);
4801 		if (local_mad) {
4802 			ret = opa_local_smp_check(ibp, in_wc);
4803 			if (ret)
4804 				return IB_MAD_RESULT_FAILURE;
4805 		}
4806 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4807 				       out_mad, &resp_len, local_mad);
4808 		goto bail;
4809 	case IB_MGMT_CLASS_PERF_MGMT:
4810 		ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
4811 		if (ret)
4812 			return IB_MAD_RESULT_FAILURE;
4813 
4814 		ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len);
4815 		goto bail;
4816 
4817 	default:
4818 		ret = IB_MAD_RESULT_SUCCESS;
4819 	}
4820 
4821 bail:
4822 	if (ret & IB_MAD_RESULT_REPLY)
4823 		*out_mad_size = round_up(resp_len, 8);
4824 	else if (ret & IB_MAD_RESULT_SUCCESS)
4825 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4826 
4827 	return ret;
4828 }
4829 
4830 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u32 port,
4831 			       const struct ib_wc *in_wc,
4832 			       const struct ib_grh *in_grh,
4833 			       const struct ib_mad *in_mad,
4834 			       struct ib_mad *out_mad)
4835 {
4836 	int ret;
4837 
4838 	switch (in_mad->mad_hdr.mgmt_class) {
4839 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4840 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4841 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4842 		break;
4843 	case IB_MGMT_CLASS_PERF_MGMT:
4844 		ret = process_perf(ibdev, port, in_mad, out_mad);
4845 		break;
4846 	default:
4847 		ret = IB_MAD_RESULT_SUCCESS;
4848 		break;
4849 	}
4850 
4851 	return ret;
4852 }
4853 
4854 /**
4855  * hfi1_process_mad - process an incoming MAD packet
4856  * @ibdev: the infiniband device this packet came in on
4857  * @mad_flags: MAD flags
4858  * @port: the port number this packet came in on
4859  * @in_wc: the work completion entry for this packet
4860  * @in_grh: the global route header for this packet
4861  * @in_mad: the incoming MAD
4862  * @out_mad: any outgoing MAD reply
4863  * @out_mad_size: size of the outgoing MAD reply
4864  * @out_mad_pkey_index: used to apss back the packet key index
4865  *
4866  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4867  * interested in processing.
4868  *
4869  * Note that the verbs framework has already done the MAD sanity checks,
4870  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4871  * MADs.
4872  *
4873  * This is called by the ib_mad module.
4874  */
4875 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
4876 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4877 		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
4878 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4879 {
4880 	switch (in_mad->mad_hdr.base_version) {
4881 	case OPA_MGMT_BASE_VERSION:
4882 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4883 					    in_wc, in_grh,
4884 					    (struct opa_mad *)in_mad,
4885 					    (struct opa_mad *)out_mad,
4886 					    out_mad_size,
4887 					    out_mad_pkey_index);
4888 	case IB_MGMT_BASE_VERSION:
4889 		return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
4890 					   in_grh, in_mad, out_mad);
4891 	default:
4892 		break;
4893 	}
4894 
4895 	return IB_MAD_RESULT_FAILURE;
4896 }
4897