xref: /openbmc/linux/drivers/infiniband/hw/hfi1/mad.c (revision 95db3b25)
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51 
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56 
57 /* the reset value from the FM is supposed to be 0xffff, handle both */
58 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
59 #define OPA_LINK_WIDTH_RESET 0xffff
60 
61 static int reply(struct ib_mad_hdr *smp)
62 {
63 	/*
64 	 * The verbs framework will handle the directed/LID route
65 	 * packet changes.
66 	 */
67 	smp->method = IB_MGMT_METHOD_GET_RESP;
68 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
69 		smp->status |= IB_SMP_DIRECTION;
70 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
71 }
72 
73 static inline void clear_opa_smp_data(struct opa_smp *smp)
74 {
75 	void *data = opa_get_smp_data(smp);
76 	size_t size = opa_get_smp_data_size(smp);
77 
78 	memset(data, 0, size);
79 }
80 
81 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
82 {
83 	struct ib_event event;
84 
85 	event.event = IB_EVENT_PKEY_CHANGE;
86 	event.device = &dd->verbs_dev.rdi.ibdev;
87 	event.element.port_num = port;
88 	ib_dispatch_event(&event);
89 }
90 
91 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
92 {
93 	struct ib_mad_send_buf *send_buf;
94 	struct ib_mad_agent *agent;
95 	struct opa_smp *smp;
96 	int ret;
97 	unsigned long flags;
98 	unsigned long timeout;
99 	int pkey_idx;
100 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
101 
102 	agent = ibp->rvp.send_agent;
103 	if (!agent)
104 		return;
105 
106 	/* o14-3.2.1 */
107 	if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
108 		return;
109 
110 	/* o14-2 */
111 	if (ibp->rvp.trap_timeout && time_before(jiffies,
112 						 ibp->rvp.trap_timeout))
113 		return;
114 
115 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
116 	if (pkey_idx < 0) {
117 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
118 			__func__, hfi1_get_pkey(ibp, 1));
119 		pkey_idx = 1;
120 	}
121 
122 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
123 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
124 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
125 	if (IS_ERR(send_buf))
126 		return;
127 
128 	smp = send_buf->mad;
129 	smp->base_version = OPA_MGMT_BASE_VERSION;
130 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
131 	smp->class_version = OPA_SMI_CLASS_VERSION;
132 	smp->method = IB_MGMT_METHOD_TRAP;
133 	ibp->rvp.tid++;
134 	smp->tid = cpu_to_be64(ibp->rvp.tid);
135 	smp->attr_id = IB_SMP_ATTR_NOTICE;
136 	/* o14-1: smp->mkey = 0; */
137 	memcpy(smp->route.lid.data, data, len);
138 
139 	spin_lock_irqsave(&ibp->rvp.lock, flags);
140 	if (!ibp->rvp.sm_ah) {
141 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
142 			struct ib_ah *ah;
143 
144 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
145 			if (IS_ERR(ah)) {
146 				ret = PTR_ERR(ah);
147 			} else {
148 				send_buf->ah = ah;
149 				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
150 				ret = 0;
151 			}
152 		} else {
153 			ret = -EINVAL;
154 		}
155 	} else {
156 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
157 		ret = 0;
158 	}
159 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
160 
161 	if (!ret)
162 		ret = ib_post_send_mad(send_buf, NULL);
163 	if (!ret) {
164 		/* 4.096 usec. */
165 		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
166 		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
167 	} else {
168 		ib_free_send_mad(send_buf);
169 		ibp->rvp.trap_timeout = 0;
170 	}
171 }
172 
173 /*
174  * Send a bad [PQ]_Key trap (ch. 14.3.8).
175  */
176 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
177 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2)
178 {
179 	struct opa_mad_notice_attr data;
180 	u32 lid = ppd_from_ibp(ibp)->lid;
181 	u32 _lid1 = lid1;
182 	u32 _lid2 = lid2;
183 
184 	memset(&data, 0, sizeof(data));
185 
186 	if (trap_num == OPA_TRAP_BAD_P_KEY)
187 		ibp->rvp.pkey_violations++;
188 	else
189 		ibp->rvp.qkey_violations++;
190 	ibp->rvp.n_pkt_drops++;
191 
192 	/* Send violation trap */
193 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
194 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
195 	data.trap_num = trap_num;
196 	data.issuer_lid = cpu_to_be32(lid);
197 	data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
198 	data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
199 	data.ntc_257_258.key = cpu_to_be32(key);
200 	data.ntc_257_258.sl = sl << 3;
201 	data.ntc_257_258.qp1 = cpu_to_be32(qp1);
202 	data.ntc_257_258.qp2 = cpu_to_be32(qp2);
203 
204 	send_trap(ibp, &data, sizeof(data));
205 }
206 
207 /*
208  * Send a bad M_Key trap (ch. 14.3.9).
209  */
210 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
211 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
212 {
213 	struct opa_mad_notice_attr data;
214 	u32 lid = ppd_from_ibp(ibp)->lid;
215 
216 	memset(&data, 0, sizeof(data));
217 	/* Send violation trap */
218 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
219 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
220 	data.trap_num = OPA_TRAP_BAD_M_KEY;
221 	data.issuer_lid = cpu_to_be32(lid);
222 	data.ntc_256.lid = data.issuer_lid;
223 	data.ntc_256.method = mad->method;
224 	data.ntc_256.attr_id = mad->attr_id;
225 	data.ntc_256.attr_mod = mad->attr_mod;
226 	data.ntc_256.mkey = mkey;
227 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
228 		data.ntc_256.dr_slid = dr_slid;
229 		data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
230 		if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
231 			data.ntc_256.dr_trunc_hop |=
232 				IB_NOTICE_TRAP_DR_TRUNC;
233 			hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
234 		}
235 		data.ntc_256.dr_trunc_hop |= hop_cnt;
236 		memcpy(data.ntc_256.dr_rtn_path, return_path,
237 		       hop_cnt);
238 	}
239 
240 	send_trap(ibp, &data, sizeof(data));
241 }
242 
243 /*
244  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
245  */
246 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
247 {
248 	struct opa_mad_notice_attr data;
249 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
250 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
251 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
252 	u32 lid = ppd_from_ibp(ibp)->lid;
253 
254 	memset(&data, 0, sizeof(data));
255 
256 	data.generic_type = IB_NOTICE_TYPE_INFO;
257 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
258 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
259 	data.issuer_lid = cpu_to_be32(lid);
260 	data.ntc_144.lid = data.issuer_lid;
261 	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
262 
263 	send_trap(ibp, &data, sizeof(data));
264 }
265 
266 /*
267  * Send a System Image GUID Changed trap (ch. 14.3.12).
268  */
269 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
270 {
271 	struct opa_mad_notice_attr data;
272 	u32 lid = ppd_from_ibp(ibp)->lid;
273 
274 	memset(&data, 0, sizeof(data));
275 
276 	data.generic_type = IB_NOTICE_TYPE_INFO;
277 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
278 	data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
279 	data.issuer_lid = cpu_to_be32(lid);
280 	data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
281 	data.ntc_145.lid = data.issuer_lid;
282 
283 	send_trap(ibp, &data, sizeof(data));
284 }
285 
286 /*
287  * Send a Node Description Changed trap (ch. 14.3.13).
288  */
289 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
290 {
291 	struct opa_mad_notice_attr data;
292 	u32 lid = ppd_from_ibp(ibp)->lid;
293 
294 	memset(&data, 0, sizeof(data));
295 
296 	data.generic_type = IB_NOTICE_TYPE_INFO;
297 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
298 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
299 	data.issuer_lid = cpu_to_be32(lid);
300 	data.ntc_144.lid = data.issuer_lid;
301 	data.ntc_144.change_flags =
302 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
303 
304 	send_trap(ibp, &data, sizeof(data));
305 }
306 
307 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
308 				   u8 *data, struct ib_device *ibdev,
309 				   u8 port, u32 *resp_len)
310 {
311 	struct opa_node_description *nd;
312 
313 	if (am) {
314 		smp->status |= IB_SMP_INVALID_FIELD;
315 		return reply((struct ib_mad_hdr *)smp);
316 	}
317 
318 	nd = (struct opa_node_description *)data;
319 
320 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
321 
322 	if (resp_len)
323 		*resp_len += sizeof(*nd);
324 
325 	return reply((struct ib_mad_hdr *)smp);
326 }
327 
328 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
329 				   struct ib_device *ibdev, u8 port,
330 				   u32 *resp_len)
331 {
332 	struct opa_node_info *ni;
333 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
334 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
335 
336 	ni = (struct opa_node_info *)data;
337 
338 	/* GUID 0 is illegal */
339 	if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
340 		smp->status |= IB_SMP_INVALID_FIELD;
341 		return reply((struct ib_mad_hdr *)smp);
342 	}
343 
344 	ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
345 	ni->base_version = OPA_MGMT_BASE_VERSION;
346 	ni->class_version = OPA_SMI_CLASS_VERSION;
347 	ni->node_type = 1;     /* channel adapter */
348 	ni->num_ports = ibdev->phys_port_cnt;
349 	/* This is already in network order */
350 	ni->system_image_guid = ib_hfi1_sys_image_guid;
351 	/* Use first-port GUID as node */
352 	ni->node_guid = cpu_to_be64(dd->pport->guid);
353 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
354 	ni->device_id = cpu_to_be16(dd->pcidev->device);
355 	ni->revision = cpu_to_be32(dd->minrev);
356 	ni->local_port_num = port;
357 	ni->vendor_id[0] = dd->oui1;
358 	ni->vendor_id[1] = dd->oui2;
359 	ni->vendor_id[2] = dd->oui3;
360 
361 	if (resp_len)
362 		*resp_len += sizeof(*ni);
363 
364 	return reply((struct ib_mad_hdr *)smp);
365 }
366 
367 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
368 			     u8 port)
369 {
370 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
371 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
372 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
373 
374 	/* GUID 0 is illegal */
375 	if (smp->attr_mod || pidx >= dd->num_pports ||
376 	    dd->pport[pidx].guid == 0)
377 		smp->status |= IB_SMP_INVALID_FIELD;
378 	else
379 		nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
380 
381 	nip->base_version = OPA_MGMT_BASE_VERSION;
382 	nip->class_version = OPA_SMI_CLASS_VERSION;
383 	nip->node_type = 1;     /* channel adapter */
384 	nip->num_ports = ibdev->phys_port_cnt;
385 	/* This is already in network order */
386 	nip->sys_guid = ib_hfi1_sys_image_guid;
387 	 /* Use first-port GUID as node */
388 	nip->node_guid = cpu_to_be64(dd->pport->guid);
389 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
390 	nip->device_id = cpu_to_be16(dd->pcidev->device);
391 	nip->revision = cpu_to_be32(dd->minrev);
392 	nip->local_port_num = port;
393 	nip->vendor_id[0] = dd->oui1;
394 	nip->vendor_id[1] = dd->oui2;
395 	nip->vendor_id[2] = dd->oui3;
396 
397 	return reply((struct ib_mad_hdr *)smp);
398 }
399 
400 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
401 {
402 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
403 }
404 
405 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
406 {
407 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
408 }
409 
410 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
411 {
412 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
413 }
414 
415 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
416 		      int mad_flags, __be64 mkey, __be32 dr_slid,
417 		      u8 return_path[], u8 hop_cnt)
418 {
419 	int valid_mkey = 0;
420 	int ret = 0;
421 
422 	/* Is the mkey in the process of expiring? */
423 	if (ibp->rvp.mkey_lease_timeout &&
424 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
425 		/* Clear timeout and mkey protection field. */
426 		ibp->rvp.mkey_lease_timeout = 0;
427 		ibp->rvp.mkeyprot = 0;
428 	}
429 
430 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
431 	    ibp->rvp.mkey == mkey)
432 		valid_mkey = 1;
433 
434 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
435 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
436 	    (mad->method == IB_MGMT_METHOD_GET ||
437 	     mad->method == IB_MGMT_METHOD_SET ||
438 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
439 		ibp->rvp.mkey_lease_timeout = 0;
440 
441 	if (!valid_mkey) {
442 		switch (mad->method) {
443 		case IB_MGMT_METHOD_GET:
444 			/* Bad mkey not a violation below level 2 */
445 			if (ibp->rvp.mkeyprot < 2)
446 				break;
447 		case IB_MGMT_METHOD_SET:
448 		case IB_MGMT_METHOD_TRAP_REPRESS:
449 			if (ibp->rvp.mkey_violations != 0xFFFF)
450 				++ibp->rvp.mkey_violations;
451 			if (!ibp->rvp.mkey_lease_timeout &&
452 			    ibp->rvp.mkey_lease_period)
453 				ibp->rvp.mkey_lease_timeout = jiffies +
454 					ibp->rvp.mkey_lease_period * HZ;
455 			/* Generate a trap notice. */
456 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
457 				 hop_cnt);
458 			ret = 1;
459 		}
460 	}
461 
462 	return ret;
463 }
464 
465 /*
466  * The SMA caches reads from LCB registers in case the LCB is unavailable.
467  * (The LCB is unavailable in certain link states, for example.)
468  */
469 struct lcb_datum {
470 	u32 off;
471 	u64 val;
472 };
473 
474 static struct lcb_datum lcb_cache[] = {
475 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
476 };
477 
478 static int write_lcb_cache(u32 off, u64 val)
479 {
480 	int i;
481 
482 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
483 		if (lcb_cache[i].off == off) {
484 			lcb_cache[i].val = val;
485 			return 0;
486 		}
487 	}
488 
489 	pr_warn("%s bad offset 0x%x\n", __func__, off);
490 	return -1;
491 }
492 
493 static int read_lcb_cache(u32 off, u64 *val)
494 {
495 	int i;
496 
497 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
498 		if (lcb_cache[i].off == off) {
499 			*val = lcb_cache[i].val;
500 			return 0;
501 		}
502 	}
503 
504 	pr_warn("%s bad offset 0x%x\n", __func__, off);
505 	return -1;
506 }
507 
508 void read_ltp_rtt(struct hfi1_devdata *dd)
509 {
510 	u64 reg;
511 
512 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
513 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
514 	else
515 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
516 }
517 
518 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
519 				   struct ib_device *ibdev, u8 port,
520 				   u32 *resp_len)
521 {
522 	int i;
523 	struct hfi1_devdata *dd;
524 	struct hfi1_pportdata *ppd;
525 	struct hfi1_ibport *ibp;
526 	struct opa_port_info *pi = (struct opa_port_info *)data;
527 	u8 mtu;
528 	u8 credit_rate;
529 	u8 is_beaconing_active;
530 	u32 state;
531 	u32 num_ports = OPA_AM_NPORT(am);
532 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
533 	u32 buffer_units;
534 	u64 tmp = 0;
535 
536 	if (num_ports != 1) {
537 		smp->status |= IB_SMP_INVALID_FIELD;
538 		return reply((struct ib_mad_hdr *)smp);
539 	}
540 
541 	dd = dd_from_ibdev(ibdev);
542 	/* IB numbers ports from 1, hw from 0 */
543 	ppd = dd->pport + (port - 1);
544 	ibp = &ppd->ibport_data;
545 
546 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
547 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
548 		smp->status |= IB_SMP_INVALID_FIELD;
549 		return reply((struct ib_mad_hdr *)smp);
550 	}
551 
552 	pi->lid = cpu_to_be32(ppd->lid);
553 
554 	/* Only return the mkey if the protection field allows it. */
555 	if (!(smp->method == IB_MGMT_METHOD_GET &&
556 	      ibp->rvp.mkey != smp->mkey &&
557 	      ibp->rvp.mkeyprot == 1))
558 		pi->mkey = ibp->rvp.mkey;
559 
560 	pi->subnet_prefix = ibp->rvp.gid_prefix;
561 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
562 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
563 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
564 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
565 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
566 
567 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
568 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
569 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
570 
571 	pi->link_width_downgrade.supported =
572 			cpu_to_be16(ppd->link_width_downgrade_supported);
573 	pi->link_width_downgrade.enabled =
574 			cpu_to_be16(ppd->link_width_downgrade_enabled);
575 	pi->link_width_downgrade.tx_active =
576 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
577 	pi->link_width_downgrade.rx_active =
578 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
579 
580 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
581 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
582 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
583 
584 	state = driver_lstate(ppd);
585 
586 	if (start_of_sm_config && (state == IB_PORT_INIT))
587 		ppd->is_sm_config_started = 1;
588 
589 	pi->port_phys_conf = (ppd->port_type & 0xf);
590 
591 #if PI_LED_ENABLE_SUP
592 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
593 	pi->port_states.ledenable_offlinereason |=
594 		ppd->is_sm_config_started << 5;
595 	/*
596 	 * This pairs with the memory barrier in hfi1_start_led_override to
597 	 * ensure that we read the correct state of LED beaconing represented
598 	 * by led_override_timer_active
599 	 */
600 	smp_rmb();
601 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
602 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
603 	pi->port_states.ledenable_offlinereason |=
604 		ppd->offline_disabled_reason;
605 #else
606 	pi->port_states.offline_reason = ppd->neighbor_normal << 4;
607 	pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
608 	pi->port_states.offline_reason |= ppd->offline_disabled_reason;
609 #endif /* PI_LED_ENABLE_SUP */
610 
611 	pi->port_states.portphysstate_portstate =
612 		(hfi1_ibphys_portstate(ppd) << 4) | state;
613 
614 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
615 
616 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
617 	for (i = 0; i < ppd->vls_supported; i++) {
618 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
619 		if ((i % 2) == 0)
620 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
621 		else
622 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
623 	}
624 	/* don't forget VL 15 */
625 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
626 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
627 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
628 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
629 	pi->partenforce_filterraw |=
630 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
631 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
632 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
633 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
634 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
635 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
636 	/* P_KeyViolations are counted by hardware. */
637 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
638 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
639 
640 	pi->vl.cap = ppd->vls_supported;
641 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
642 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
643 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
644 
645 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
646 
647 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
648 					  OPA_PORT_LINK_MODE_OPA << 5 |
649 					  OPA_PORT_LINK_MODE_OPA);
650 
651 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
652 
653 	pi->port_mode = cpu_to_be16(
654 				ppd->is_active_optimize_enabled ?
655 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
656 
657 	pi->port_packet_format.supported =
658 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
659 	pi->port_packet_format.enabled =
660 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
661 
662 	/* flit_control.interleave is (OPA V1, version .76):
663 	 * bits		use
664 	 * ----		---
665 	 * 2		res
666 	 * 2		DistanceSupported
667 	 * 2		DistanceEnabled
668 	 * 5		MaxNextLevelTxEnabled
669 	 * 5		MaxNestLevelRxSupported
670 	 *
671 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
672 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
673 	 * to 0x1.
674 	 */
675 	pi->flit_control.interleave = cpu_to_be16(0x1400);
676 
677 	pi->link_down_reason = ppd->local_link_down_reason.sma;
678 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
679 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
680 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
681 
682 	/* 32.768 usec. response time (guessing) */
683 	pi->resptimevalue = 3;
684 
685 	pi->local_port_num = port;
686 
687 	/* buffer info for FM */
688 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
689 
690 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
691 	pi->neigh_port_num = ppd->neighbor_port_number;
692 	pi->port_neigh_mode =
693 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
694 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
695 		(ppd->neighbor_fm_security ?
696 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
697 
698 	/* HFIs shall always return VL15 credits to their
699 	 * neighbor in a timely manner, without any credit return pacing.
700 	 */
701 	credit_rate = 0;
702 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
703 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
704 	buffer_units |= (credit_rate << 6) &
705 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
706 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
707 	pi->buffer_units = cpu_to_be32(buffer_units);
708 
709 	pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
710 
711 	/* HFI supports a replay buffer 128 LTPs in size */
712 	pi->replay_depth.buffer = 0x80;
713 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
714 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
715 
716 	/*
717 	 * this counter is 16 bits wide, but the replay_depth.wire
718 	 * variable is only 8 bits
719 	 */
720 	if (tmp > 0xff)
721 		tmp = 0xff;
722 	pi->replay_depth.wire = tmp;
723 
724 	if (resp_len)
725 		*resp_len += sizeof(struct opa_port_info);
726 
727 	return reply((struct ib_mad_hdr *)smp);
728 }
729 
730 /**
731  * get_pkeys - return the PKEY table
732  * @dd: the hfi1_ib device
733  * @port: the IB port number
734  * @pkeys: the pkey table is placed here
735  */
736 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
737 {
738 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
739 
740 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
741 
742 	return 0;
743 }
744 
745 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
746 				    struct ib_device *ibdev, u8 port,
747 				    u32 *resp_len)
748 {
749 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
750 	u32 n_blocks_req = OPA_AM_NBLK(am);
751 	u32 start_block = am & 0x7ff;
752 	__be16 *p;
753 	u16 *q;
754 	int i;
755 	u16 n_blocks_avail;
756 	unsigned npkeys = hfi1_get_npkeys(dd);
757 	size_t size;
758 
759 	if (n_blocks_req == 0) {
760 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
761 			port, start_block, n_blocks_req);
762 		smp->status |= IB_SMP_INVALID_FIELD;
763 		return reply((struct ib_mad_hdr *)smp);
764 	}
765 
766 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
767 
768 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
769 
770 	if (start_block + n_blocks_req > n_blocks_avail ||
771 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
772 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
773 			"avail 0x%x; blk/smp 0x%lx\n",
774 			start_block, n_blocks_req, n_blocks_avail,
775 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
776 		smp->status |= IB_SMP_INVALID_FIELD;
777 		return reply((struct ib_mad_hdr *)smp);
778 	}
779 
780 	p = (__be16 *)data;
781 	q = (u16 *)data;
782 	/* get the real pkeys if we are requesting the first block */
783 	if (start_block == 0) {
784 		get_pkeys(dd, port, q);
785 		for (i = 0; i < npkeys; i++)
786 			p[i] = cpu_to_be16(q[i]);
787 		if (resp_len)
788 			*resp_len += size;
789 	} else {
790 		smp->status |= IB_SMP_INVALID_FIELD;
791 	}
792 	return reply((struct ib_mad_hdr *)smp);
793 }
794 
795 enum {
796 	HFI_TRANSITION_DISALLOWED,
797 	HFI_TRANSITION_IGNORED,
798 	HFI_TRANSITION_ALLOWED,
799 	HFI_TRANSITION_UNDEFINED,
800 };
801 
802 /*
803  * Use shortened names to improve readability of
804  * {logical,physical}_state_transitions
805  */
806 enum {
807 	__D = HFI_TRANSITION_DISALLOWED,
808 	__I = HFI_TRANSITION_IGNORED,
809 	__A = HFI_TRANSITION_ALLOWED,
810 	__U = HFI_TRANSITION_UNDEFINED,
811 };
812 
813 /*
814  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
815  * represented in physical_state_transitions.
816  */
817 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
818 
819 /*
820  * Within physical_state_transitions, rows represent "old" states,
821  * columns "new" states, and physical_state_transitions.allowed[old][new]
822  * indicates if the transition from old state to new state is legal (see
823  * OPAg1v1, Table 6-4).
824  */
825 static const struct {
826 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
827 } physical_state_transitions = {
828 	{
829 		/* 2    3    4    5    6    7    8    9   10   11 */
830 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
831 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
832 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
833 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
834 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
835 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
836 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
837 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
838 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
839 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
840 	}
841 };
842 
843 /*
844  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
845  * logical_state_transitions
846  */
847 
848 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
849 
850 /*
851  * Within logical_state_transitions rows represent "old" states,
852  * columns "new" states, and logical_state_transitions.allowed[old][new]
853  * indicates if the transition from old state to new state is legal (see
854  * OPAg1v1, Table 9-12).
855  */
856 static const struct {
857 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
858 } logical_state_transitions = {
859 	{
860 		/* 1    2    3    4    5 */
861 	/* 1 */	{ __I, __D, __D, __D, __U},
862 	/* 2 */	{ __D, __I, __A, __D, __U},
863 	/* 3 */	{ __D, __D, __I, __A, __U},
864 	/* 4 */	{ __D, __D, __I, __I, __U},
865 	/* 5 */	{ __U, __U, __U, __U, __U},
866 	}
867 };
868 
869 static int logical_transition_allowed(int old, int new)
870 {
871 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
872 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
873 		pr_warn("invalid logical state(s) (old %d new %d)\n",
874 			old, new);
875 		return HFI_TRANSITION_UNDEFINED;
876 	}
877 
878 	if (new == IB_PORT_NOP)
879 		return HFI_TRANSITION_ALLOWED; /* always allowed */
880 
881 	/* adjust states for indexing into logical_state_transitions */
882 	old -= IB_PORT_DOWN;
883 	new -= IB_PORT_DOWN;
884 
885 	if (old < 0 || new < 0)
886 		return HFI_TRANSITION_UNDEFINED;
887 	return logical_state_transitions.allowed[old][new];
888 }
889 
890 static int physical_transition_allowed(int old, int new)
891 {
892 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
893 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
894 		pr_warn("invalid physical state(s) (old %d new %d)\n",
895 			old, new);
896 		return HFI_TRANSITION_UNDEFINED;
897 	}
898 
899 	if (new == IB_PORTPHYSSTATE_NOP)
900 		return HFI_TRANSITION_ALLOWED; /* always allowed */
901 
902 	/* adjust states for indexing into physical_state_transitions */
903 	old -= IB_PORTPHYSSTATE_POLLING;
904 	new -= IB_PORTPHYSSTATE_POLLING;
905 
906 	if (old < 0 || new < 0)
907 		return HFI_TRANSITION_UNDEFINED;
908 	return physical_state_transitions.allowed[old][new];
909 }
910 
911 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
912 					  u32 logical_new, u32 physical_new)
913 {
914 	u32 physical_old = driver_physical_state(ppd);
915 	u32 logical_old = driver_logical_state(ppd);
916 	int ret, logical_allowed, physical_allowed;
917 
918 	ret = logical_transition_allowed(logical_old, logical_new);
919 	logical_allowed = ret;
920 
921 	if (ret == HFI_TRANSITION_DISALLOWED ||
922 	    ret == HFI_TRANSITION_UNDEFINED) {
923 		pr_warn("invalid logical state transition %s -> %s\n",
924 			opa_lstate_name(logical_old),
925 			opa_lstate_name(logical_new));
926 		return ret;
927 	}
928 
929 	ret = physical_transition_allowed(physical_old, physical_new);
930 	physical_allowed = ret;
931 
932 	if (ret == HFI_TRANSITION_DISALLOWED ||
933 	    ret == HFI_TRANSITION_UNDEFINED) {
934 		pr_warn("invalid physical state transition %s -> %s\n",
935 			opa_pstate_name(physical_old),
936 			opa_pstate_name(physical_new));
937 		return ret;
938 	}
939 
940 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
941 	    physical_allowed == HFI_TRANSITION_IGNORED)
942 		return HFI_TRANSITION_IGNORED;
943 
944 	/*
945 	 * A change request of Physical Port State from
946 	 * 'Offline' to 'Polling' should be ignored.
947 	 */
948 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
949 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
950 		return HFI_TRANSITION_IGNORED;
951 
952 	/*
953 	 * Either physical_allowed or logical_allowed is
954 	 * HFI_TRANSITION_ALLOWED.
955 	 */
956 	return HFI_TRANSITION_ALLOWED;
957 }
958 
959 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
960 			   u32 logical_state, u32 phys_state,
961 			   int suppress_idle_sma)
962 {
963 	struct hfi1_devdata *dd = ppd->dd;
964 	u32 link_state;
965 	int ret;
966 
967 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
968 	if (ret == HFI_TRANSITION_DISALLOWED ||
969 	    ret == HFI_TRANSITION_UNDEFINED) {
970 		/* error message emitted above */
971 		smp->status |= IB_SMP_INVALID_FIELD;
972 		return 0;
973 	}
974 
975 	if (ret == HFI_TRANSITION_IGNORED)
976 		return 0;
977 
978 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
979 	    !(logical_state == IB_PORT_DOWN ||
980 	      logical_state == IB_PORT_NOP)){
981 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
982 			logical_state, phys_state);
983 		smp->status |= IB_SMP_INVALID_FIELD;
984 	}
985 
986 	/*
987 	 * Logical state changes are summarized in OPAv1g1 spec.,
988 	 * Table 9-12; physical state changes are summarized in
989 	 * OPAv1g1 spec., Table 6.4.
990 	 */
991 	switch (logical_state) {
992 	case IB_PORT_NOP:
993 		if (phys_state == IB_PORTPHYSSTATE_NOP)
994 			break;
995 		/* FALLTHROUGH */
996 	case IB_PORT_DOWN:
997 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
998 			link_state = HLS_DN_DOWNDEF;
999 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1000 			link_state = HLS_DN_POLL;
1001 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1002 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1003 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1004 			link_state = HLS_DN_DISABLE;
1005 		} else {
1006 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1007 				phys_state);
1008 			smp->status |= IB_SMP_INVALID_FIELD;
1009 			break;
1010 		}
1011 
1012 		if ((link_state == HLS_DN_POLL ||
1013 		     link_state == HLS_DN_DOWNDEF)) {
1014 			/*
1015 			 * Going to poll.  No matter what the current state,
1016 			 * always move offline first, then tune and start the
1017 			 * link.  This correctly handles a FM link bounce and
1018 			 * a link enable.  Going offline is a no-op if already
1019 			 * offline.
1020 			 */
1021 			set_link_state(ppd, HLS_DN_OFFLINE);
1022 			tune_serdes(ppd);
1023 			start_link(ppd);
1024 		} else {
1025 			set_link_state(ppd, link_state);
1026 		}
1027 		if (link_state == HLS_DN_DISABLE &&
1028 		    (ppd->offline_disabled_reason >
1029 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1030 		     ppd->offline_disabled_reason ==
1031 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1032 			ppd->offline_disabled_reason =
1033 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1034 		/*
1035 		 * Don't send a reply if the response would be sent
1036 		 * through the disabled port.
1037 		 */
1038 		if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1039 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1040 		break;
1041 	case IB_PORT_ARMED:
1042 		ret = set_link_state(ppd, HLS_UP_ARMED);
1043 		if ((ret == 0) && (suppress_idle_sma == 0))
1044 			send_idle_sma(dd, SMA_IDLE_ARM);
1045 		break;
1046 	case IB_PORT_ACTIVE:
1047 		if (ppd->neighbor_normal) {
1048 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1049 			if (ret == 0)
1050 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1051 		} else {
1052 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1053 			smp->status |= IB_SMP_INVALID_FIELD;
1054 		}
1055 		break;
1056 	default:
1057 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1058 			logical_state);
1059 		smp->status |= IB_SMP_INVALID_FIELD;
1060 	}
1061 
1062 	return 0;
1063 }
1064 
1065 /**
1066  * subn_set_opa_portinfo - set port information
1067  * @smp: the incoming SM packet
1068  * @ibdev: the infiniband device
1069  * @port: the port on the device
1070  *
1071  */
1072 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1073 				   struct ib_device *ibdev, u8 port,
1074 				   u32 *resp_len)
1075 {
1076 	struct opa_port_info *pi = (struct opa_port_info *)data;
1077 	struct ib_event event;
1078 	struct hfi1_devdata *dd;
1079 	struct hfi1_pportdata *ppd;
1080 	struct hfi1_ibport *ibp;
1081 	u8 clientrereg;
1082 	unsigned long flags;
1083 	u32 smlid, opa_lid; /* tmp vars to hold LID values */
1084 	u16 lid;
1085 	u8 ls_old, ls_new, ps_new;
1086 	u8 vls;
1087 	u8 msl;
1088 	u8 crc_enabled;
1089 	u16 lse, lwe, mtu;
1090 	u32 num_ports = OPA_AM_NPORT(am);
1091 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1092 	int ret, i, invalid = 0, call_set_mtu = 0;
1093 	int call_link_downgrade_policy = 0;
1094 
1095 	if (num_ports != 1) {
1096 		smp->status |= IB_SMP_INVALID_FIELD;
1097 		return reply((struct ib_mad_hdr *)smp);
1098 	}
1099 
1100 	opa_lid = be32_to_cpu(pi->lid);
1101 	if (opa_lid & 0xFFFF0000) {
1102 		pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1103 		smp->status |= IB_SMP_INVALID_FIELD;
1104 		goto get_only;
1105 	}
1106 
1107 	lid = (u16)(opa_lid & 0x0000FFFF);
1108 
1109 	smlid = be32_to_cpu(pi->sm_lid);
1110 	if (smlid & 0xFFFF0000) {
1111 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1112 		smp->status |= IB_SMP_INVALID_FIELD;
1113 		goto get_only;
1114 	}
1115 	smlid &= 0x0000FFFF;
1116 
1117 	clientrereg = (pi->clientrereg_subnettimeout &
1118 			OPA_PI_MASK_CLIENT_REREGISTER);
1119 
1120 	dd = dd_from_ibdev(ibdev);
1121 	/* IB numbers ports from 1, hw from 0 */
1122 	ppd = dd->pport + (port - 1);
1123 	ibp = &ppd->ibport_data;
1124 	event.device = ibdev;
1125 	event.element.port_num = port;
1126 
1127 	ls_old = driver_lstate(ppd);
1128 
1129 	ibp->rvp.mkey = pi->mkey;
1130 	ibp->rvp.gid_prefix = pi->subnet_prefix;
1131 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1132 
1133 	/* Must be a valid unicast LID address. */
1134 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1135 	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1136 		smp->status |= IB_SMP_INVALID_FIELD;
1137 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1138 			lid);
1139 	} else if (ppd->lid != lid ||
1140 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1141 		if (ppd->lid != lid)
1142 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1143 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1144 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1145 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1146 		event.event = IB_EVENT_LID_CHANGE;
1147 		ib_dispatch_event(&event);
1148 	}
1149 
1150 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1151 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1152 		ppd->linkinit_reason =
1153 			(pi->partenforce_filterraw &
1154 			 OPA_PI_MASK_LINKINIT_REASON);
1155 	/* enable/disable SW pkey checking as per FM control */
1156 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
1157 		ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
1158 	else
1159 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
1160 
1161 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
1162 		ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
1163 	else
1164 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
1165 
1166 	/* Must be a valid unicast LID address. */
1167 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1168 	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1169 		smp->status |= IB_SMP_INVALID_FIELD;
1170 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1171 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1172 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1173 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1174 		if (ibp->rvp.sm_ah) {
1175 			if (smlid != ibp->rvp.sm_lid)
1176 				ibp->rvp.sm_ah->attr.dlid = smlid;
1177 			if (msl != ibp->rvp.sm_sl)
1178 				ibp->rvp.sm_ah->attr.sl = msl;
1179 		}
1180 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1181 		if (smlid != ibp->rvp.sm_lid)
1182 			ibp->rvp.sm_lid = smlid;
1183 		if (msl != ibp->rvp.sm_sl)
1184 			ibp->rvp.sm_sl = msl;
1185 		event.event = IB_EVENT_SM_CHANGE;
1186 		ib_dispatch_event(&event);
1187 	}
1188 
1189 	if (pi->link_down_reason == 0) {
1190 		ppd->local_link_down_reason.sma = 0;
1191 		ppd->local_link_down_reason.latest = 0;
1192 	}
1193 
1194 	if (pi->neigh_link_down_reason == 0) {
1195 		ppd->neigh_link_down_reason.sma = 0;
1196 		ppd->neigh_link_down_reason.latest = 0;
1197 	}
1198 
1199 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1200 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1201 
1202 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1203 	lwe = be16_to_cpu(pi->link_width.enabled);
1204 	if (lwe) {
1205 		if (lwe == OPA_LINK_WIDTH_RESET ||
1206 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1207 			set_link_width_enabled(ppd, ppd->link_width_supported);
1208 		else if ((lwe & ~ppd->link_width_supported) == 0)
1209 			set_link_width_enabled(ppd, lwe);
1210 		else
1211 			smp->status |= IB_SMP_INVALID_FIELD;
1212 	}
1213 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1214 	/* LWD.E is always applied - 0 means "disabled" */
1215 	if (lwe == OPA_LINK_WIDTH_RESET ||
1216 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1217 		set_link_width_downgrade_enabled(ppd,
1218 						 ppd->
1219 						 link_width_downgrade_supported
1220 						 );
1221 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1222 		/* only set and apply if something changed */
1223 		if (lwe != ppd->link_width_downgrade_enabled) {
1224 			set_link_width_downgrade_enabled(ppd, lwe);
1225 			call_link_downgrade_policy = 1;
1226 		}
1227 	} else {
1228 		smp->status |= IB_SMP_INVALID_FIELD;
1229 	}
1230 	lse = be16_to_cpu(pi->link_speed.enabled);
1231 	if (lse) {
1232 		if (lse & be16_to_cpu(pi->link_speed.supported))
1233 			set_link_speed_enabled(ppd, lse);
1234 		else
1235 			smp->status |= IB_SMP_INVALID_FIELD;
1236 	}
1237 
1238 	ibp->rvp.mkeyprot =
1239 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1240 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1241 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1242 				    ibp->rvp.vl_high_limit);
1243 
1244 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1245 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1246 		smp->status |= IB_SMP_INVALID_FIELD;
1247 		return reply((struct ib_mad_hdr *)smp);
1248 	}
1249 	for (i = 0; i < ppd->vls_supported; i++) {
1250 		if ((i % 2) == 0)
1251 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1252 					   4) & 0xF);
1253 		else
1254 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1255 					  0xF);
1256 		if (mtu == 0xffff) {
1257 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1258 				mtu,
1259 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1260 			smp->status |= IB_SMP_INVALID_FIELD;
1261 			mtu = hfi1_max_mtu; /* use a valid MTU */
1262 		}
1263 		if (dd->vld[i].mtu != mtu) {
1264 			dd_dev_info(dd,
1265 				    "MTU change on vl %d from %d to %d\n",
1266 				    i, dd->vld[i].mtu, mtu);
1267 			dd->vld[i].mtu = mtu;
1268 			call_set_mtu++;
1269 		}
1270 	}
1271 	/* As per OPAV1 spec: VL15 must support and be configured
1272 	 * for operation with a 2048 or larger MTU.
1273 	 */
1274 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1275 	if (mtu < 2048 || mtu == 0xffff)
1276 		mtu = 2048;
1277 	if (dd->vld[15].mtu != mtu) {
1278 		dd_dev_info(dd,
1279 			    "MTU change on vl 15 from %d to %d\n",
1280 			    dd->vld[15].mtu, mtu);
1281 		dd->vld[15].mtu = mtu;
1282 		call_set_mtu++;
1283 	}
1284 	if (call_set_mtu)
1285 		set_mtu(ppd);
1286 
1287 	/* Set operational VLs */
1288 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1289 	if (vls) {
1290 		if (vls > ppd->vls_supported) {
1291 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1292 				pi->operational_vls);
1293 			smp->status |= IB_SMP_INVALID_FIELD;
1294 		} else {
1295 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1296 					    vls) == -EINVAL)
1297 				smp->status |= IB_SMP_INVALID_FIELD;
1298 		}
1299 	}
1300 
1301 	if (pi->mkey_violations == 0)
1302 		ibp->rvp.mkey_violations = 0;
1303 
1304 	if (pi->pkey_violations == 0)
1305 		ibp->rvp.pkey_violations = 0;
1306 
1307 	if (pi->qkey_violations == 0)
1308 		ibp->rvp.qkey_violations = 0;
1309 
1310 	ibp->rvp.subnet_timeout =
1311 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1312 
1313 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1314 	crc_enabled >>= 4;
1315 	crc_enabled &= 0xf;
1316 
1317 	if (crc_enabled != 0)
1318 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1319 
1320 	ppd->is_active_optimize_enabled =
1321 			!!(be16_to_cpu(pi->port_mode)
1322 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1323 
1324 	ls_new = pi->port_states.portphysstate_portstate &
1325 			OPA_PI_MASK_PORT_STATE;
1326 	ps_new = (pi->port_states.portphysstate_portstate &
1327 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1328 
1329 	if (ls_old == IB_PORT_INIT) {
1330 		if (start_of_sm_config) {
1331 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1332 				ppd->is_sm_config_started = 1;
1333 		} else if (ls_new == IB_PORT_ARMED) {
1334 			if (ppd->is_sm_config_started == 0)
1335 				invalid = 1;
1336 		}
1337 	}
1338 
1339 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1340 	if (clientrereg) {
1341 		event.event = IB_EVENT_CLIENT_REREGISTER;
1342 		ib_dispatch_event(&event);
1343 	}
1344 
1345 	/*
1346 	 * Do the port state change now that the other link parameters
1347 	 * have been set.
1348 	 * Changing the port physical state only makes sense if the link
1349 	 * is down or is being set to down.
1350 	 */
1351 
1352 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1353 	if (ret)
1354 		return ret;
1355 
1356 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1357 
1358 	/* restore re-reg bit per o14-12.2.1 */
1359 	pi->clientrereg_subnettimeout |= clientrereg;
1360 
1361 	/*
1362 	 * Apply the new link downgrade policy.  This may result in a link
1363 	 * bounce.  Do this after everything else so things are settled.
1364 	 * Possible problem: if setting the port state above fails, then
1365 	 * the policy change is not applied.
1366 	 */
1367 	if (call_link_downgrade_policy)
1368 		apply_link_downgrade_policy(ppd, 0);
1369 
1370 	return ret;
1371 
1372 get_only:
1373 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1374 }
1375 
1376 /**
1377  * set_pkeys - set the PKEY table for ctxt 0
1378  * @dd: the hfi1_ib device
1379  * @port: the IB port number
1380  * @pkeys: the PKEY table
1381  */
1382 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1383 {
1384 	struct hfi1_pportdata *ppd;
1385 	int i;
1386 	int changed = 0;
1387 	int update_includes_mgmt_partition = 0;
1388 
1389 	/*
1390 	 * IB port one/two always maps to context zero/one,
1391 	 * always a kernel context, no locking needed
1392 	 * If we get here with ppd setup, no need to check
1393 	 * that rcd is valid.
1394 	 */
1395 	ppd = dd->pport + (port - 1);
1396 	/*
1397 	 * If the update does not include the management pkey, don't do it.
1398 	 */
1399 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1400 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1401 			update_includes_mgmt_partition = 1;
1402 			break;
1403 		}
1404 	}
1405 
1406 	if (!update_includes_mgmt_partition)
1407 		return 1;
1408 
1409 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1410 		u16 key = pkeys[i];
1411 		u16 okey = ppd->pkeys[i];
1412 
1413 		if (key == okey)
1414 			continue;
1415 		/*
1416 		 * Don't update pkeys[2], if an HFI port without MgmtAllowed
1417 		 * by neighbor is a switch.
1418 		 */
1419 		if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
1420 			continue;
1421 		/*
1422 		 * The SM gives us the complete PKey table. We have
1423 		 * to ensure that we put the PKeys in the matching
1424 		 * slots.
1425 		 */
1426 		ppd->pkeys[i] = key;
1427 		changed = 1;
1428 	}
1429 
1430 	if (changed) {
1431 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1432 		hfi1_event_pkey_change(dd, port);
1433 	}
1434 
1435 	return 0;
1436 }
1437 
1438 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1439 				    struct ib_device *ibdev, u8 port,
1440 				    u32 *resp_len)
1441 {
1442 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1443 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1444 	u32 start_block = am & 0x7ff;
1445 	u16 *p = (u16 *)data;
1446 	__be16 *q = (__be16 *)data;
1447 	int i;
1448 	u16 n_blocks_avail;
1449 	unsigned npkeys = hfi1_get_npkeys(dd);
1450 
1451 	if (n_blocks_sent == 0) {
1452 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1453 			port, start_block, n_blocks_sent);
1454 		smp->status |= IB_SMP_INVALID_FIELD;
1455 		return reply((struct ib_mad_hdr *)smp);
1456 	}
1457 
1458 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1459 
1460 	if (start_block + n_blocks_sent > n_blocks_avail ||
1461 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1462 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1463 			start_block, n_blocks_sent, n_blocks_avail,
1464 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1465 		smp->status |= IB_SMP_INVALID_FIELD;
1466 		return reply((struct ib_mad_hdr *)smp);
1467 	}
1468 
1469 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1470 		p[i] = be16_to_cpu(q[i]);
1471 
1472 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1473 		smp->status |= IB_SMP_INVALID_FIELD;
1474 		return reply((struct ib_mad_hdr *)smp);
1475 	}
1476 
1477 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1478 }
1479 
1480 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1481 {
1482 	u64 *val = data;
1483 
1484 	*val++ = read_csr(dd, SEND_SC2VLT0);
1485 	*val++ = read_csr(dd, SEND_SC2VLT1);
1486 	*val++ = read_csr(dd, SEND_SC2VLT2);
1487 	*val++ = read_csr(dd, SEND_SC2VLT3);
1488 	return 0;
1489 }
1490 
1491 #define ILLEGAL_VL 12
1492 /*
1493  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1494  * for SC15, which must map to VL15). If we don't remap things this
1495  * way it is possible for VL15 counters to increment when we try to
1496  * send on a SC which is mapped to an invalid VL.
1497  */
1498 static void filter_sc2vlt(void *data)
1499 {
1500 	int i;
1501 	u8 *pd = data;
1502 
1503 	for (i = 0; i < OPA_MAX_SCS; i++) {
1504 		if (i == 15)
1505 			continue;
1506 		if ((pd[i] & 0x1f) == 0xf)
1507 			pd[i] = ILLEGAL_VL;
1508 	}
1509 }
1510 
1511 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1512 {
1513 	u64 *val = data;
1514 
1515 	filter_sc2vlt(data);
1516 
1517 	write_csr(dd, SEND_SC2VLT0, *val++);
1518 	write_csr(dd, SEND_SC2VLT1, *val++);
1519 	write_csr(dd, SEND_SC2VLT2, *val++);
1520 	write_csr(dd, SEND_SC2VLT3, *val++);
1521 	write_seqlock_irq(&dd->sc2vl_lock);
1522 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1523 	write_sequnlock_irq(&dd->sc2vl_lock);
1524 	return 0;
1525 }
1526 
1527 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1528 				   struct ib_device *ibdev, u8 port,
1529 				   u32 *resp_len)
1530 {
1531 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1532 	u8 *p = data;
1533 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1534 	unsigned i;
1535 
1536 	if (am) {
1537 		smp->status |= IB_SMP_INVALID_FIELD;
1538 		return reply((struct ib_mad_hdr *)smp);
1539 	}
1540 
1541 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1542 		*p++ = ibp->sl_to_sc[i];
1543 
1544 	if (resp_len)
1545 		*resp_len += size;
1546 
1547 	return reply((struct ib_mad_hdr *)smp);
1548 }
1549 
1550 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1551 				   struct ib_device *ibdev, u8 port,
1552 				   u32 *resp_len)
1553 {
1554 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1555 	u8 *p = data;
1556 	int i;
1557 	u8 sc;
1558 
1559 	if (am) {
1560 		smp->status |= IB_SMP_INVALID_FIELD;
1561 		return reply((struct ib_mad_hdr *)smp);
1562 	}
1563 
1564 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1565 		sc = *p++;
1566 		if (ibp->sl_to_sc[i] != sc) {
1567 			ibp->sl_to_sc[i] = sc;
1568 
1569 			/* Put all stale qps into error state */
1570 			hfi1_error_port_qps(ibp, i);
1571 		}
1572 	}
1573 
1574 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1575 }
1576 
1577 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1578 				   struct ib_device *ibdev, u8 port,
1579 				   u32 *resp_len)
1580 {
1581 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1582 	u8 *p = data;
1583 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1584 	unsigned i;
1585 
1586 	if (am) {
1587 		smp->status |= IB_SMP_INVALID_FIELD;
1588 		return reply((struct ib_mad_hdr *)smp);
1589 	}
1590 
1591 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1592 		*p++ = ibp->sc_to_sl[i];
1593 
1594 	if (resp_len)
1595 		*resp_len += size;
1596 
1597 	return reply((struct ib_mad_hdr *)smp);
1598 }
1599 
1600 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1601 				   struct ib_device *ibdev, u8 port,
1602 				   u32 *resp_len)
1603 {
1604 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1605 	u8 *p = data;
1606 	int i;
1607 
1608 	if (am) {
1609 		smp->status |= IB_SMP_INVALID_FIELD;
1610 		return reply((struct ib_mad_hdr *)smp);
1611 	}
1612 
1613 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1614 		ibp->sc_to_sl[i] = *p++;
1615 
1616 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1617 }
1618 
1619 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1620 				    struct ib_device *ibdev, u8 port,
1621 				    u32 *resp_len)
1622 {
1623 	u32 n_blocks = OPA_AM_NBLK(am);
1624 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1625 	void *vp = (void *)data;
1626 	size_t size = 4 * sizeof(u64);
1627 
1628 	if (n_blocks != 1) {
1629 		smp->status |= IB_SMP_INVALID_FIELD;
1630 		return reply((struct ib_mad_hdr *)smp);
1631 	}
1632 
1633 	get_sc2vlt_tables(dd, vp);
1634 
1635 	if (resp_len)
1636 		*resp_len += size;
1637 
1638 	return reply((struct ib_mad_hdr *)smp);
1639 }
1640 
1641 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1642 				    struct ib_device *ibdev, u8 port,
1643 				    u32 *resp_len)
1644 {
1645 	u32 n_blocks = OPA_AM_NBLK(am);
1646 	int async_update = OPA_AM_ASYNC(am);
1647 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1648 	void *vp = (void *)data;
1649 	struct hfi1_pportdata *ppd;
1650 	int lstate;
1651 
1652 	if (n_blocks != 1 || async_update) {
1653 		smp->status |= IB_SMP_INVALID_FIELD;
1654 		return reply((struct ib_mad_hdr *)smp);
1655 	}
1656 
1657 	/* IB numbers ports from 1, hw from 0 */
1658 	ppd = dd->pport + (port - 1);
1659 	lstate = driver_lstate(ppd);
1660 	/*
1661 	 * it's known that async_update is 0 by this point, but include
1662 	 * the explicit check for clarity
1663 	 */
1664 	if (!async_update &&
1665 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1666 		smp->status |= IB_SMP_INVALID_FIELD;
1667 		return reply((struct ib_mad_hdr *)smp);
1668 	}
1669 
1670 	set_sc2vlt_tables(dd, vp);
1671 
1672 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1673 }
1674 
1675 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1676 				     struct ib_device *ibdev, u8 port,
1677 				     u32 *resp_len)
1678 {
1679 	u32 n_blocks = OPA_AM_NPORT(am);
1680 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1681 	struct hfi1_pportdata *ppd;
1682 	void *vp = (void *)data;
1683 	int size;
1684 
1685 	if (n_blocks != 1) {
1686 		smp->status |= IB_SMP_INVALID_FIELD;
1687 		return reply((struct ib_mad_hdr *)smp);
1688 	}
1689 
1690 	ppd = dd->pport + (port - 1);
1691 
1692 	size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1693 
1694 	if (resp_len)
1695 		*resp_len += size;
1696 
1697 	return reply((struct ib_mad_hdr *)smp);
1698 }
1699 
1700 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1701 				     struct ib_device *ibdev, u8 port,
1702 				     u32 *resp_len)
1703 {
1704 	u32 n_blocks = OPA_AM_NPORT(am);
1705 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1706 	struct hfi1_pportdata *ppd;
1707 	void *vp = (void *)data;
1708 	int lstate;
1709 
1710 	if (n_blocks != 1) {
1711 		smp->status |= IB_SMP_INVALID_FIELD;
1712 		return reply((struct ib_mad_hdr *)smp);
1713 	}
1714 
1715 	/* IB numbers ports from 1, hw from 0 */
1716 	ppd = dd->pport + (port - 1);
1717 	lstate = driver_lstate(ppd);
1718 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1719 		smp->status |= IB_SMP_INVALID_FIELD;
1720 		return reply((struct ib_mad_hdr *)smp);
1721 	}
1722 
1723 	ppd = dd->pport + (port - 1);
1724 
1725 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1726 
1727 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1728 					 resp_len);
1729 }
1730 
1731 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1732 			      struct ib_device *ibdev, u8 port,
1733 			      u32 *resp_len)
1734 {
1735 	u32 nports = OPA_AM_NPORT(am);
1736 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1737 	u32 lstate;
1738 	struct hfi1_ibport *ibp;
1739 	struct hfi1_pportdata *ppd;
1740 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1741 
1742 	if (nports != 1) {
1743 		smp->status |= IB_SMP_INVALID_FIELD;
1744 		return reply((struct ib_mad_hdr *)smp);
1745 	}
1746 
1747 	ibp = to_iport(ibdev, port);
1748 	ppd = ppd_from_ibp(ibp);
1749 
1750 	lstate = driver_lstate(ppd);
1751 
1752 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
1753 		ppd->is_sm_config_started = 1;
1754 
1755 #if PI_LED_ENABLE_SUP
1756 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1757 	psi->port_states.ledenable_offlinereason |=
1758 		ppd->is_sm_config_started << 5;
1759 	psi->port_states.ledenable_offlinereason |=
1760 		ppd->offline_disabled_reason;
1761 #else
1762 	psi->port_states.offline_reason = ppd->neighbor_normal << 4;
1763 	psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
1764 	psi->port_states.offline_reason |= ppd->offline_disabled_reason;
1765 #endif /* PI_LED_ENABLE_SUP */
1766 
1767 	psi->port_states.portphysstate_portstate =
1768 		(hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1769 	psi->link_width_downgrade_tx_active =
1770 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
1771 	psi->link_width_downgrade_rx_active =
1772 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
1773 	if (resp_len)
1774 		*resp_len += sizeof(struct opa_port_state_info);
1775 
1776 	return reply((struct ib_mad_hdr *)smp);
1777 }
1778 
1779 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1780 			      struct ib_device *ibdev, u8 port,
1781 			      u32 *resp_len)
1782 {
1783 	u32 nports = OPA_AM_NPORT(am);
1784 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1785 	u32 ls_old;
1786 	u8 ls_new, ps_new;
1787 	struct hfi1_ibport *ibp;
1788 	struct hfi1_pportdata *ppd;
1789 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1790 	int ret, invalid = 0;
1791 
1792 	if (nports != 1) {
1793 		smp->status |= IB_SMP_INVALID_FIELD;
1794 		return reply((struct ib_mad_hdr *)smp);
1795 	}
1796 
1797 	ibp = to_iport(ibdev, port);
1798 	ppd = ppd_from_ibp(ibp);
1799 
1800 	ls_old = driver_lstate(ppd);
1801 
1802 	ls_new = port_states_to_logical_state(&psi->port_states);
1803 	ps_new = port_states_to_phys_state(&psi->port_states);
1804 
1805 	if (ls_old == IB_PORT_INIT) {
1806 		if (start_of_sm_config) {
1807 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1808 				ppd->is_sm_config_started = 1;
1809 		} else if (ls_new == IB_PORT_ARMED) {
1810 			if (ppd->is_sm_config_started == 0)
1811 				invalid = 1;
1812 		}
1813 	}
1814 
1815 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1816 	if (ret)
1817 		return ret;
1818 
1819 	if (invalid)
1820 		smp->status |= IB_SMP_INVALID_FIELD;
1821 
1822 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1823 }
1824 
1825 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1826 				     struct ib_device *ibdev, u8 port,
1827 				     u32 *resp_len)
1828 {
1829 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1830 	u32 addr = OPA_AM_CI_ADDR(am);
1831 	u32 len = OPA_AM_CI_LEN(am) + 1;
1832 	int ret;
1833 
1834 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1835 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1836 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1837 
1838 	/*
1839 	 * check that addr is within spec, and
1840 	 * addr and (addr + len - 1) are on the same "page"
1841 	 */
1842 	if (addr >= 4096 ||
1843 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1844 		smp->status |= IB_SMP_INVALID_FIELD;
1845 		return reply((struct ib_mad_hdr *)smp);
1846 	}
1847 
1848 	ret = get_cable_info(dd, port, addr, len, data);
1849 
1850 	if (ret == -ENODEV) {
1851 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1852 		return reply((struct ib_mad_hdr *)smp);
1853 	}
1854 
1855 	/* The address range for the CableInfo SMA query is wider than the
1856 	 * memory available on the QSFP cable. We want to return a valid
1857 	 * response, albeit zeroed out, for address ranges beyond available
1858 	 * memory but that are within the CableInfo query spec
1859 	 */
1860 	if (ret < 0 && ret != -ERANGE) {
1861 		smp->status |= IB_SMP_INVALID_FIELD;
1862 		return reply((struct ib_mad_hdr *)smp);
1863 	}
1864 
1865 	if (resp_len)
1866 		*resp_len += len;
1867 
1868 	return reply((struct ib_mad_hdr *)smp);
1869 }
1870 
1871 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1872 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1873 {
1874 	u32 num_ports = OPA_AM_NPORT(am);
1875 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1876 	struct hfi1_pportdata *ppd;
1877 	struct buffer_control *p = (struct buffer_control *)data;
1878 	int size;
1879 
1880 	if (num_ports != 1) {
1881 		smp->status |= IB_SMP_INVALID_FIELD;
1882 		return reply((struct ib_mad_hdr *)smp);
1883 	}
1884 
1885 	ppd = dd->pport + (port - 1);
1886 	size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1887 	trace_bct_get(dd, p);
1888 	if (resp_len)
1889 		*resp_len += size;
1890 
1891 	return reply((struct ib_mad_hdr *)smp);
1892 }
1893 
1894 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1895 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1896 {
1897 	u32 num_ports = OPA_AM_NPORT(am);
1898 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1899 	struct hfi1_pportdata *ppd;
1900 	struct buffer_control *p = (struct buffer_control *)data;
1901 
1902 	if (num_ports != 1) {
1903 		smp->status |= IB_SMP_INVALID_FIELD;
1904 		return reply((struct ib_mad_hdr *)smp);
1905 	}
1906 	ppd = dd->pport + (port - 1);
1907 	trace_bct_set(dd, p);
1908 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1909 		smp->status |= IB_SMP_INVALID_FIELD;
1910 		return reply((struct ib_mad_hdr *)smp);
1911 	}
1912 
1913 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1914 }
1915 
1916 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1917 				 struct ib_device *ibdev, u8 port,
1918 				 u32 *resp_len)
1919 {
1920 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1921 	u32 num_ports = OPA_AM_NPORT(am);
1922 	u8 section = (am & 0x00ff0000) >> 16;
1923 	u8 *p = data;
1924 	int size = 0;
1925 
1926 	if (num_ports != 1) {
1927 		smp->status |= IB_SMP_INVALID_FIELD;
1928 		return reply((struct ib_mad_hdr *)smp);
1929 	}
1930 
1931 	switch (section) {
1932 	case OPA_VLARB_LOW_ELEMENTS:
1933 		size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1934 		break;
1935 	case OPA_VLARB_HIGH_ELEMENTS:
1936 		size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1937 		break;
1938 	case OPA_VLARB_PREEMPT_ELEMENTS:
1939 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1940 		break;
1941 	case OPA_VLARB_PREEMPT_MATRIX:
1942 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1943 		break;
1944 	default:
1945 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1946 			be32_to_cpu(smp->attr_mod));
1947 		smp->status |= IB_SMP_INVALID_FIELD;
1948 		break;
1949 	}
1950 
1951 	if (size > 0 && resp_len)
1952 		*resp_len += size;
1953 
1954 	return reply((struct ib_mad_hdr *)smp);
1955 }
1956 
1957 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1958 				 struct ib_device *ibdev, u8 port,
1959 				 u32 *resp_len)
1960 {
1961 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1962 	u32 num_ports = OPA_AM_NPORT(am);
1963 	u8 section = (am & 0x00ff0000) >> 16;
1964 	u8 *p = data;
1965 
1966 	if (num_ports != 1) {
1967 		smp->status |= IB_SMP_INVALID_FIELD;
1968 		return reply((struct ib_mad_hdr *)smp);
1969 	}
1970 
1971 	switch (section) {
1972 	case OPA_VLARB_LOW_ELEMENTS:
1973 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1974 		break;
1975 	case OPA_VLARB_HIGH_ELEMENTS:
1976 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1977 		break;
1978 	/*
1979 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1980 	 * can be changed from the default values
1981 	 */
1982 	case OPA_VLARB_PREEMPT_ELEMENTS:
1983 		/* FALLTHROUGH */
1984 	case OPA_VLARB_PREEMPT_MATRIX:
1985 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1986 		break;
1987 	default:
1988 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1989 			be32_to_cpu(smp->attr_mod));
1990 		smp->status |= IB_SMP_INVALID_FIELD;
1991 		break;
1992 	}
1993 
1994 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1995 }
1996 
1997 struct opa_pma_mad {
1998 	struct ib_mad_hdr mad_hdr;
1999 	u8 data[2024];
2000 } __packed;
2001 
2002 struct opa_class_port_info {
2003 	u8 base_version;
2004 	u8 class_version;
2005 	__be16 cap_mask;
2006 	__be32 cap_mask2_resp_time;
2007 
2008 	u8 redirect_gid[16];
2009 	__be32 redirect_tc_fl;
2010 	__be32 redirect_lid;
2011 	__be32 redirect_sl_qp;
2012 	__be32 redirect_qkey;
2013 
2014 	u8 trap_gid[16];
2015 	__be32 trap_tc_fl;
2016 	__be32 trap_lid;
2017 	__be32 trap_hl_qp;
2018 	__be32 trap_qkey;
2019 
2020 	__be16 trap_pkey;
2021 	__be16 redirect_pkey;
2022 
2023 	u8 trap_sl_rsvd;
2024 	u8 reserved[3];
2025 } __packed;
2026 
2027 struct opa_port_status_req {
2028 	__u8 port_num;
2029 	__u8 reserved[3];
2030 	__be32 vl_select_mask;
2031 };
2032 
2033 #define VL_MASK_ALL		0x000080ff
2034 
2035 struct opa_port_status_rsp {
2036 	__u8 port_num;
2037 	__u8 reserved[3];
2038 	__be32  vl_select_mask;
2039 
2040 	/* Data counters */
2041 	__be64 port_xmit_data;
2042 	__be64 port_rcv_data;
2043 	__be64 port_xmit_pkts;
2044 	__be64 port_rcv_pkts;
2045 	__be64 port_multicast_xmit_pkts;
2046 	__be64 port_multicast_rcv_pkts;
2047 	__be64 port_xmit_wait;
2048 	__be64 sw_port_congestion;
2049 	__be64 port_rcv_fecn;
2050 	__be64 port_rcv_becn;
2051 	__be64 port_xmit_time_cong;
2052 	__be64 port_xmit_wasted_bw;
2053 	__be64 port_xmit_wait_data;
2054 	__be64 port_rcv_bubble;
2055 	__be64 port_mark_fecn;
2056 	/* Error counters */
2057 	__be64 port_rcv_constraint_errors;
2058 	__be64 port_rcv_switch_relay_errors;
2059 	__be64 port_xmit_discards;
2060 	__be64 port_xmit_constraint_errors;
2061 	__be64 port_rcv_remote_physical_errors;
2062 	__be64 local_link_integrity_errors;
2063 	__be64 port_rcv_errors;
2064 	__be64 excessive_buffer_overruns;
2065 	__be64 fm_config_errors;
2066 	__be32 link_error_recovery;
2067 	__be32 link_downed;
2068 	u8 uncorrectable_errors;
2069 
2070 	u8 link_quality_indicator; /* 5res, 3bit */
2071 	u8 res2[6];
2072 	struct _vls_pctrs {
2073 		/* per-VL Data counters */
2074 		__be64 port_vl_xmit_data;
2075 		__be64 port_vl_rcv_data;
2076 		__be64 port_vl_xmit_pkts;
2077 		__be64 port_vl_rcv_pkts;
2078 		__be64 port_vl_xmit_wait;
2079 		__be64 sw_port_vl_congestion;
2080 		__be64 port_vl_rcv_fecn;
2081 		__be64 port_vl_rcv_becn;
2082 		__be64 port_xmit_time_cong;
2083 		__be64 port_vl_xmit_wasted_bw;
2084 		__be64 port_vl_xmit_wait_data;
2085 		__be64 port_vl_rcv_bubble;
2086 		__be64 port_vl_mark_fecn;
2087 		__be64 port_vl_xmit_discards;
2088 	} vls[0]; /* real array size defined by # bits set in vl_select_mask */
2089 };
2090 
2091 enum counter_selects {
2092 	CS_PORT_XMIT_DATA			= (1 << 31),
2093 	CS_PORT_RCV_DATA			= (1 << 30),
2094 	CS_PORT_XMIT_PKTS			= (1 << 29),
2095 	CS_PORT_RCV_PKTS			= (1 << 28),
2096 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2097 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2098 	CS_PORT_XMIT_WAIT			= (1 << 25),
2099 	CS_SW_PORT_CONGESTION			= (1 << 24),
2100 	CS_PORT_RCV_FECN			= (1 << 23),
2101 	CS_PORT_RCV_BECN			= (1 << 22),
2102 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2103 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2104 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2105 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2106 	CS_PORT_MARK_FECN			= (1 << 17),
2107 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2108 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2109 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2110 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2111 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2112 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2113 	CS_PORT_RCV_ERRORS			= (1 << 10),
2114 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2115 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2116 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2117 	CS_LINK_DOWNED				= (1 << 6),
2118 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2119 };
2120 
2121 struct opa_clear_port_status {
2122 	__be64 port_select_mask[4];
2123 	__be32 counter_select_mask;
2124 };
2125 
2126 struct opa_aggregate {
2127 	__be16 attr_id;
2128 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2129 	__be32 attr_mod;
2130 	u8 data[0];
2131 };
2132 
2133 #define MSK_LLI 0x000000f0
2134 #define MSK_LLI_SFT 4
2135 #define MSK_LER 0x0000000f
2136 #define MSK_LER_SFT 0
2137 #define ADD_LLI 8
2138 #define ADD_LER 2
2139 
2140 /* Request contains first three fields, response contains those plus the rest */
2141 struct opa_port_data_counters_msg {
2142 	__be64 port_select_mask[4];
2143 	__be32 vl_select_mask;
2144 	__be32 resolution;
2145 
2146 	/* Response fields follow */
2147 	struct _port_dctrs {
2148 		u8 port_number;
2149 		u8 reserved2[3];
2150 		__be32 link_quality_indicator; /* 29res, 3bit */
2151 
2152 		/* Data counters */
2153 		__be64 port_xmit_data;
2154 		__be64 port_rcv_data;
2155 		__be64 port_xmit_pkts;
2156 		__be64 port_rcv_pkts;
2157 		__be64 port_multicast_xmit_pkts;
2158 		__be64 port_multicast_rcv_pkts;
2159 		__be64 port_xmit_wait;
2160 		__be64 sw_port_congestion;
2161 		__be64 port_rcv_fecn;
2162 		__be64 port_rcv_becn;
2163 		__be64 port_xmit_time_cong;
2164 		__be64 port_xmit_wasted_bw;
2165 		__be64 port_xmit_wait_data;
2166 		__be64 port_rcv_bubble;
2167 		__be64 port_mark_fecn;
2168 
2169 		__be64 port_error_counter_summary;
2170 		/* Sum of error counts/port */
2171 
2172 		struct _vls_dctrs {
2173 			/* per-VL Data counters */
2174 			__be64 port_vl_xmit_data;
2175 			__be64 port_vl_rcv_data;
2176 			__be64 port_vl_xmit_pkts;
2177 			__be64 port_vl_rcv_pkts;
2178 			__be64 port_vl_xmit_wait;
2179 			__be64 sw_port_vl_congestion;
2180 			__be64 port_vl_rcv_fecn;
2181 			__be64 port_vl_rcv_becn;
2182 			__be64 port_xmit_time_cong;
2183 			__be64 port_vl_xmit_wasted_bw;
2184 			__be64 port_vl_xmit_wait_data;
2185 			__be64 port_vl_rcv_bubble;
2186 			__be64 port_vl_mark_fecn;
2187 		} vls[0];
2188 		/* array size defined by #bits set in vl_select_mask*/
2189 	} port[1]; /* array size defined by  #ports in attribute modifier */
2190 };
2191 
2192 struct opa_port_error_counters64_msg {
2193 	/*
2194 	 * Request contains first two fields, response contains the
2195 	 * whole magilla
2196 	 */
2197 	__be64 port_select_mask[4];
2198 	__be32 vl_select_mask;
2199 
2200 	/* Response-only fields follow */
2201 	__be32 reserved1;
2202 	struct _port_ectrs {
2203 		u8 port_number;
2204 		u8 reserved2[7];
2205 		__be64 port_rcv_constraint_errors;
2206 		__be64 port_rcv_switch_relay_errors;
2207 		__be64 port_xmit_discards;
2208 		__be64 port_xmit_constraint_errors;
2209 		__be64 port_rcv_remote_physical_errors;
2210 		__be64 local_link_integrity_errors;
2211 		__be64 port_rcv_errors;
2212 		__be64 excessive_buffer_overruns;
2213 		__be64 fm_config_errors;
2214 		__be32 link_error_recovery;
2215 		__be32 link_downed;
2216 		u8 uncorrectable_errors;
2217 		u8 reserved3[7];
2218 		struct _vls_ectrs {
2219 			__be64 port_vl_xmit_discards;
2220 		} vls[0];
2221 		/* array size defined by #bits set in vl_select_mask */
2222 	} port[1]; /* array size defined by #ports in attribute modifier */
2223 };
2224 
2225 struct opa_port_error_info_msg {
2226 	__be64 port_select_mask[4];
2227 	__be32 error_info_select_mask;
2228 	__be32 reserved1;
2229 	struct _port_ei {
2230 		u8 port_number;
2231 		u8 reserved2[7];
2232 
2233 		/* PortRcvErrorInfo */
2234 		struct {
2235 			u8 status_and_code;
2236 			union {
2237 				u8 raw[17];
2238 				struct {
2239 					/* EI1to12 format */
2240 					u8 packet_flit1[8];
2241 					u8 packet_flit2[8];
2242 					u8 remaining_flit_bits12;
2243 				} ei1to12;
2244 				struct {
2245 					u8 packet_bytes[8];
2246 					u8 remaining_flit_bits;
2247 				} ei13;
2248 			} ei;
2249 			u8 reserved3[6];
2250 		} __packed port_rcv_ei;
2251 
2252 		/* ExcessiveBufferOverrunInfo */
2253 		struct {
2254 			u8 status_and_sc;
2255 			u8 reserved4[7];
2256 		} __packed excessive_buffer_overrun_ei;
2257 
2258 		/* PortXmitConstraintErrorInfo */
2259 		struct {
2260 			u8 status;
2261 			u8 reserved5;
2262 			__be16 pkey;
2263 			__be32 slid;
2264 		} __packed port_xmit_constraint_ei;
2265 
2266 		/* PortRcvConstraintErrorInfo */
2267 		struct {
2268 			u8 status;
2269 			u8 reserved6;
2270 			__be16 pkey;
2271 			__be32 slid;
2272 		} __packed port_rcv_constraint_ei;
2273 
2274 		/* PortRcvSwitchRelayErrorInfo */
2275 		struct {
2276 			u8 status_and_code;
2277 			u8 reserved7[3];
2278 			__u32 error_info;
2279 		} __packed port_rcv_switch_relay_ei;
2280 
2281 		/* UncorrectableErrorInfo */
2282 		struct {
2283 			u8 status_and_code;
2284 			u8 reserved8;
2285 		} __packed uncorrectable_ei;
2286 
2287 		/* FMConfigErrorInfo */
2288 		struct {
2289 			u8 status_and_code;
2290 			u8 error_info;
2291 		} __packed fm_config_ei;
2292 		__u32 reserved9;
2293 	} port[1]; /* actual array size defined by #ports in attr modifier */
2294 };
2295 
2296 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2297 enum error_info_selects {
2298 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2299 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2300 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2301 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2302 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2303 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2304 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2305 };
2306 
2307 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2308 				     struct ib_device *ibdev, u32 *resp_len)
2309 {
2310 	struct opa_class_port_info *p =
2311 		(struct opa_class_port_info *)pmp->data;
2312 
2313 	memset(pmp->data, 0, sizeof(pmp->data));
2314 
2315 	if (pmp->mad_hdr.attr_mod != 0)
2316 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2317 
2318 	p->base_version = OPA_MGMT_BASE_VERSION;
2319 	p->class_version = OPA_SMI_CLASS_VERSION;
2320 	/*
2321 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2322 	 */
2323 	p->cap_mask2_resp_time = cpu_to_be32(18);
2324 
2325 	if (resp_len)
2326 		*resp_len += sizeof(*p);
2327 
2328 	return reply((struct ib_mad_hdr *)pmp);
2329 }
2330 
2331 static void a0_portstatus(struct hfi1_pportdata *ppd,
2332 			  struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2333 {
2334 	if (!is_bx(ppd->dd)) {
2335 		unsigned long vl;
2336 		u64 sum_vl_xmit_wait = 0;
2337 		u32 vl_all_mask = VL_MASK_ALL;
2338 
2339 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2340 				 8 * sizeof(vl_all_mask)) {
2341 			u64 tmp = sum_vl_xmit_wait +
2342 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2343 						 idx_from_vl(vl));
2344 			if (tmp < sum_vl_xmit_wait) {
2345 				/* we wrapped */
2346 				sum_vl_xmit_wait = (u64)~0;
2347 				break;
2348 			}
2349 			sum_vl_xmit_wait = tmp;
2350 		}
2351 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2352 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2353 	}
2354 }
2355 
2356 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2357 				  struct ib_device *ibdev,
2358 				  u8 port, u32 *resp_len)
2359 {
2360 	struct opa_port_status_req *req =
2361 		(struct opa_port_status_req *)pmp->data;
2362 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2363 	struct opa_port_status_rsp *rsp;
2364 	u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2365 	unsigned long vl;
2366 	size_t response_data_size;
2367 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2368 	u8 port_num = req->port_num;
2369 	u8 num_vls = hweight32(vl_select_mask);
2370 	struct _vls_pctrs *vlinfo;
2371 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2372 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2373 	int vfi;
2374 	u64 tmp, tmp2;
2375 
2376 	response_data_size = sizeof(struct opa_port_status_rsp) +
2377 				num_vls * sizeof(struct _vls_pctrs);
2378 	if (response_data_size > sizeof(pmp->data)) {
2379 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2380 		return reply((struct ib_mad_hdr *)pmp);
2381 	}
2382 
2383 	if (nports != 1 || (port_num && port_num != port) ||
2384 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2385 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2386 		return reply((struct ib_mad_hdr *)pmp);
2387 	}
2388 
2389 	memset(pmp->data, 0, sizeof(pmp->data));
2390 
2391 	rsp = (struct opa_port_status_rsp *)pmp->data;
2392 	if (port_num)
2393 		rsp->port_num = port_num;
2394 	else
2395 		rsp->port_num = port;
2396 
2397 	rsp->port_rcv_constraint_errors =
2398 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2399 					   CNTR_INVALID_VL));
2400 
2401 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2402 
2403 	rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2404 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2405 					  CNTR_INVALID_VL));
2406 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2407 					 CNTR_INVALID_VL));
2408 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2409 					  CNTR_INVALID_VL));
2410 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2411 					 CNTR_INVALID_VL));
2412 	rsp->port_multicast_xmit_pkts =
2413 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2414 					  CNTR_INVALID_VL));
2415 	rsp->port_multicast_rcv_pkts =
2416 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2417 					  CNTR_INVALID_VL));
2418 	rsp->port_xmit_wait =
2419 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2420 	rsp->port_rcv_fecn =
2421 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2422 	rsp->port_rcv_becn =
2423 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2424 	rsp->port_xmit_discards =
2425 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2426 					   CNTR_INVALID_VL));
2427 	rsp->port_xmit_constraint_errors =
2428 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2429 					   CNTR_INVALID_VL));
2430 	rsp->port_rcv_remote_physical_errors =
2431 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2432 					  CNTR_INVALID_VL));
2433 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2434 	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2435 	if (tmp2 < tmp) {
2436 		/* overflow/wrapped */
2437 		rsp->local_link_integrity_errors = cpu_to_be64(~0);
2438 	} else {
2439 		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2440 	}
2441 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2442 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2443 				   CNTR_INVALID_VL);
2444 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2445 		/* overflow/wrapped */
2446 		rsp->link_error_recovery = cpu_to_be32(~0);
2447 	} else {
2448 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2449 	}
2450 	rsp->port_rcv_errors =
2451 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2452 	rsp->excessive_buffer_overruns =
2453 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2454 	rsp->fm_config_errors =
2455 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2456 					  CNTR_INVALID_VL));
2457 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2458 						      CNTR_INVALID_VL));
2459 
2460 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2461 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2462 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2463 
2464 	vlinfo = &rsp->vls[0];
2465 	vfi = 0;
2466 	/* The vl_select_mask has been checked above, and we know
2467 	 * that it contains only entries which represent valid VLs.
2468 	 * So in the for_each_set_bit() loop below, we don't need
2469 	 * any additional checks for vl.
2470 	 */
2471 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2472 			 8 * sizeof(vl_select_mask)) {
2473 		memset(vlinfo, 0, sizeof(*vlinfo));
2474 
2475 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2476 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2477 
2478 		rsp->vls[vfi].port_vl_rcv_pkts =
2479 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2480 						  idx_from_vl(vl)));
2481 
2482 		rsp->vls[vfi].port_vl_xmit_data =
2483 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2484 						   idx_from_vl(vl)));
2485 
2486 		rsp->vls[vfi].port_vl_xmit_pkts =
2487 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2488 						   idx_from_vl(vl)));
2489 
2490 		rsp->vls[vfi].port_vl_xmit_wait =
2491 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2492 						   idx_from_vl(vl)));
2493 
2494 		rsp->vls[vfi].port_vl_rcv_fecn =
2495 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2496 						  idx_from_vl(vl)));
2497 
2498 		rsp->vls[vfi].port_vl_rcv_becn =
2499 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2500 						  idx_from_vl(vl)));
2501 
2502 		vlinfo++;
2503 		vfi++;
2504 	}
2505 
2506 	a0_portstatus(ppd, rsp, vl_select_mask);
2507 
2508 	if (resp_len)
2509 		*resp_len += response_data_size;
2510 
2511 	return reply((struct ib_mad_hdr *)pmp);
2512 }
2513 
2514 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2515 				     u8 res_lli, u8 res_ler)
2516 {
2517 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2518 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2519 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2520 	u64 error_counter_summary = 0, tmp;
2521 
2522 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2523 						CNTR_INVALID_VL);
2524 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2525 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2526 						CNTR_INVALID_VL);
2527 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2528 						CNTR_INVALID_VL);
2529 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2530 					       CNTR_INVALID_VL);
2531 	/* local link integrity must be right-shifted by the lli resolution */
2532 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2533 	tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2534 	error_counter_summary += (tmp >> res_lli);
2535 	/* link error recovery must b right-shifted by the ler resolution */
2536 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2537 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2538 	error_counter_summary += (tmp >> res_ler);
2539 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2540 					       CNTR_INVALID_VL);
2541 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2542 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2543 					       CNTR_INVALID_VL);
2544 	/* ppd->link_downed is a 32-bit value */
2545 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2546 						CNTR_INVALID_VL);
2547 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2548 	/* this is an 8-bit quantity */
2549 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2550 
2551 	return error_counter_summary;
2552 }
2553 
2554 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2555 			    u32 vl_select_mask)
2556 {
2557 	if (!is_bx(ppd->dd)) {
2558 		unsigned long vl;
2559 		u64 sum_vl_xmit_wait = 0;
2560 		u32 vl_all_mask = VL_MASK_ALL;
2561 
2562 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2563 				 8 * sizeof(vl_all_mask)) {
2564 			u64 tmp = sum_vl_xmit_wait +
2565 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2566 						 idx_from_vl(vl));
2567 			if (tmp < sum_vl_xmit_wait) {
2568 				/* we wrapped */
2569 				sum_vl_xmit_wait = (u64)~0;
2570 				break;
2571 			}
2572 			sum_vl_xmit_wait = tmp;
2573 		}
2574 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2575 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2576 	}
2577 }
2578 
2579 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2580 				   struct _port_dctrs *rsp)
2581 {
2582 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2583 
2584 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2585 						CNTR_INVALID_VL));
2586 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2587 						CNTR_INVALID_VL));
2588 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2589 						CNTR_INVALID_VL));
2590 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2591 						CNTR_INVALID_VL));
2592 	rsp->port_multicast_xmit_pkts =
2593 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2594 					  CNTR_INVALID_VL));
2595 	rsp->port_multicast_rcv_pkts =
2596 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2597 					  CNTR_INVALID_VL));
2598 }
2599 
2600 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2601 				    struct ib_device *ibdev,
2602 				    u8 port, u32 *resp_len)
2603 {
2604 	struct opa_port_data_counters_msg *req =
2605 		(struct opa_port_data_counters_msg *)pmp->data;
2606 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2607 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2608 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2609 	struct _port_dctrs *rsp;
2610 	struct _vls_dctrs *vlinfo;
2611 	size_t response_data_size;
2612 	u32 num_ports;
2613 	u8 num_pslm;
2614 	u8 lq, num_vls;
2615 	u8 res_lli, res_ler;
2616 	u64 port_mask;
2617 	unsigned long port_num;
2618 	unsigned long vl;
2619 	u32 vl_select_mask;
2620 	int vfi;
2621 
2622 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2623 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2624 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2625 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2626 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2627 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2628 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2629 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2630 
2631 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2632 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2633 		return reply((struct ib_mad_hdr *)pmp);
2634 	}
2635 
2636 	/* Sanity check */
2637 	response_data_size = sizeof(struct opa_port_data_counters_msg) +
2638 				num_vls * sizeof(struct _vls_dctrs);
2639 
2640 	if (response_data_size > sizeof(pmp->data)) {
2641 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2642 		return reply((struct ib_mad_hdr *)pmp);
2643 	}
2644 
2645 	/*
2646 	 * The bit set in the mask needs to be consistent with the
2647 	 * port the request came in on.
2648 	 */
2649 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2650 	port_num = find_first_bit((unsigned long *)&port_mask,
2651 				  sizeof(port_mask));
2652 
2653 	if ((u8)port_num != port) {
2654 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2655 		return reply((struct ib_mad_hdr *)pmp);
2656 	}
2657 
2658 	rsp = &req->port[0];
2659 	memset(rsp, 0, sizeof(*rsp));
2660 
2661 	rsp->port_number = port;
2662 	/*
2663 	 * Note that link_quality_indicator is a 32 bit quantity in
2664 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2665 	 * where it's a byte).
2666 	 */
2667 	hfi1_read_link_quality(dd, &lq);
2668 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2669 	pma_get_opa_port_dctrs(ibdev, rsp);
2670 
2671 	rsp->port_xmit_wait =
2672 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2673 	rsp->port_rcv_fecn =
2674 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2675 	rsp->port_rcv_becn =
2676 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2677 	rsp->port_error_counter_summary =
2678 		cpu_to_be64(get_error_counter_summary(ibdev, port,
2679 						      res_lli, res_ler));
2680 
2681 	vlinfo = &rsp->vls[0];
2682 	vfi = 0;
2683 	/* The vl_select_mask has been checked above, and we know
2684 	 * that it contains only entries which represent valid VLs.
2685 	 * So in the for_each_set_bit() loop below, we don't need
2686 	 * any additional checks for vl.
2687 	 */
2688 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2689 			 8 * sizeof(req->vl_select_mask)) {
2690 		memset(vlinfo, 0, sizeof(*vlinfo));
2691 
2692 		rsp->vls[vfi].port_vl_xmit_data =
2693 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2694 						   idx_from_vl(vl)));
2695 
2696 		rsp->vls[vfi].port_vl_rcv_data =
2697 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2698 						  idx_from_vl(vl)));
2699 
2700 		rsp->vls[vfi].port_vl_xmit_pkts =
2701 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2702 						   idx_from_vl(vl)));
2703 
2704 		rsp->vls[vfi].port_vl_rcv_pkts =
2705 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2706 						  idx_from_vl(vl)));
2707 
2708 		rsp->vls[vfi].port_vl_xmit_wait =
2709 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2710 						   idx_from_vl(vl)));
2711 
2712 		rsp->vls[vfi].port_vl_rcv_fecn =
2713 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2714 						  idx_from_vl(vl)));
2715 		rsp->vls[vfi].port_vl_rcv_becn =
2716 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2717 						  idx_from_vl(vl)));
2718 
2719 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2720 		/* rsp->port_vl_xmit_wasted_bw ??? */
2721 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2722 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2723 		 */
2724 		/*rsp->vls[vfi].port_vl_mark_fecn =
2725 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2726 		 *		+ offset));
2727 		 */
2728 		vlinfo++;
2729 		vfi++;
2730 	}
2731 
2732 	a0_datacounters(ppd, rsp, vl_select_mask);
2733 
2734 	if (resp_len)
2735 		*resp_len += response_data_size;
2736 
2737 	return reply((struct ib_mad_hdr *)pmp);
2738 }
2739 
2740 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2741 				       struct ib_device *ibdev, u8 port)
2742 {
2743 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2744 						pmp->data;
2745 	struct _port_dctrs rsp;
2746 
2747 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2748 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2749 		goto bail;
2750 	}
2751 
2752 	memset(&rsp, 0, sizeof(rsp));
2753 	pma_get_opa_port_dctrs(ibdev, &rsp);
2754 
2755 	p->port_xmit_data = rsp.port_xmit_data;
2756 	p->port_rcv_data = rsp.port_rcv_data;
2757 	p->port_xmit_packets = rsp.port_xmit_pkts;
2758 	p->port_rcv_packets = rsp.port_rcv_pkts;
2759 	p->port_unicast_xmit_packets = 0;
2760 	p->port_unicast_rcv_packets =  0;
2761 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2762 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2763 
2764 bail:
2765 	return reply((struct ib_mad_hdr *)pmp);
2766 }
2767 
2768 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2769 				   struct _port_ectrs *rsp, u8 port)
2770 {
2771 	u64 tmp, tmp2;
2772 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2773 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2774 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2775 
2776 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2777 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2778 					CNTR_INVALID_VL);
2779 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2780 		/* overflow/wrapped */
2781 		rsp->link_error_recovery = cpu_to_be32(~0);
2782 	} else {
2783 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2784 	}
2785 
2786 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2787 						CNTR_INVALID_VL));
2788 	rsp->port_rcv_errors =
2789 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2790 	rsp->port_rcv_remote_physical_errors =
2791 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2792 					  CNTR_INVALID_VL));
2793 	rsp->port_rcv_switch_relay_errors = 0;
2794 	rsp->port_xmit_discards =
2795 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2796 					   CNTR_INVALID_VL));
2797 	rsp->port_xmit_constraint_errors =
2798 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2799 					   CNTR_INVALID_VL));
2800 	rsp->port_rcv_constraint_errors =
2801 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2802 					   CNTR_INVALID_VL));
2803 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2804 	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2805 	if (tmp2 < tmp) {
2806 		/* overflow/wrapped */
2807 		rsp->local_link_integrity_errors = cpu_to_be64(~0);
2808 	} else {
2809 		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2810 	}
2811 	rsp->excessive_buffer_overruns =
2812 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2813 }
2814 
2815 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2816 				  struct ib_device *ibdev,
2817 				  u8 port, u32 *resp_len)
2818 {
2819 	size_t response_data_size;
2820 	struct _port_ectrs *rsp;
2821 	u8 port_num;
2822 	struct opa_port_error_counters64_msg *req;
2823 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2824 	u32 num_ports;
2825 	u8 num_pslm;
2826 	u8 num_vls;
2827 	struct hfi1_ibport *ibp;
2828 	struct hfi1_pportdata *ppd;
2829 	struct _vls_ectrs *vlinfo;
2830 	unsigned long vl;
2831 	u64 port_mask, tmp;
2832 	u32 vl_select_mask;
2833 	int vfi;
2834 
2835 	req = (struct opa_port_error_counters64_msg *)pmp->data;
2836 
2837 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2838 
2839 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2840 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2841 
2842 	if (num_ports != 1 || num_ports != num_pslm) {
2843 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2844 		return reply((struct ib_mad_hdr *)pmp);
2845 	}
2846 
2847 	response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2848 				num_vls * sizeof(struct _vls_ectrs);
2849 
2850 	if (response_data_size > sizeof(pmp->data)) {
2851 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2852 		return reply((struct ib_mad_hdr *)pmp);
2853 	}
2854 	/*
2855 	 * The bit set in the mask needs to be consistent with the
2856 	 * port the request came in on.
2857 	 */
2858 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2859 	port_num = find_first_bit((unsigned long *)&port_mask,
2860 				  sizeof(port_mask));
2861 
2862 	if (port_num != port) {
2863 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2864 		return reply((struct ib_mad_hdr *)pmp);
2865 	}
2866 
2867 	rsp = &req->port[0];
2868 
2869 	ibp = to_iport(ibdev, port_num);
2870 	ppd = ppd_from_ibp(ibp);
2871 
2872 	memset(rsp, 0, sizeof(*rsp));
2873 	rsp->port_number = port_num;
2874 
2875 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2876 
2877 	rsp->port_rcv_remote_physical_errors =
2878 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2879 					  CNTR_INVALID_VL));
2880 	rsp->fm_config_errors =
2881 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2882 					  CNTR_INVALID_VL));
2883 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2884 
2885 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2886 
2887 	vlinfo = &rsp->vls[0];
2888 	vfi = 0;
2889 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2890 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2891 			 8 * sizeof(req->vl_select_mask)) {
2892 		memset(vlinfo, 0, sizeof(*vlinfo));
2893 		/* vlinfo->vls[vfi].port_vl_xmit_discards ??? */
2894 		vlinfo += 1;
2895 		vfi++;
2896 	}
2897 
2898 	if (resp_len)
2899 		*resp_len += response_data_size;
2900 
2901 	return reply((struct ib_mad_hdr *)pmp);
2902 }
2903 
2904 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2905 				   struct ib_device *ibdev, u8 port)
2906 {
2907 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2908 		pmp->data;
2909 	struct _port_ectrs rsp;
2910 	u64 temp_link_overrun_errors;
2911 	u64 temp_64;
2912 	u32 temp_32;
2913 
2914 	memset(&rsp, 0, sizeof(rsp));
2915 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
2916 
2917 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2918 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2919 		goto bail;
2920 	}
2921 
2922 	p->symbol_error_counter = 0; /* N/A for OPA */
2923 
2924 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
2925 	if (temp_32 > 0xFFUL)
2926 		p->link_error_recovery_counter = 0xFF;
2927 	else
2928 		p->link_error_recovery_counter = (u8)temp_32;
2929 
2930 	temp_32 = be32_to_cpu(rsp.link_downed);
2931 	if (temp_32 > 0xFFUL)
2932 		p->link_downed_counter = 0xFF;
2933 	else
2934 		p->link_downed_counter = (u8)temp_32;
2935 
2936 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2937 	if (temp_64 > 0xFFFFUL)
2938 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
2939 	else
2940 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2941 
2942 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2943 	if (temp_64 > 0xFFFFUL)
2944 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2945 	else
2946 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2947 
2948 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2949 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2950 
2951 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2952 	if (temp_64 > 0xFFFFUL)
2953 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
2954 	else
2955 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2956 
2957 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2958 	if (temp_64 > 0xFFUL)
2959 		p->port_xmit_constraint_errors = 0xFF;
2960 	else
2961 		p->port_xmit_constraint_errors = (u8)temp_64;
2962 
2963 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2964 	if (temp_64 > 0xFFUL)
2965 		p->port_rcv_constraint_errors = 0xFFUL;
2966 	else
2967 		p->port_rcv_constraint_errors = (u8)temp_64;
2968 
2969 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
2970 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2971 	if (temp_64 > 0xFUL)
2972 		temp_64 = 0xFUL;
2973 
2974 	temp_link_overrun_errors = temp_64 << 4;
2975 
2976 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2977 	if (temp_64 > 0xFUL)
2978 		temp_64 = 0xFUL;
2979 	temp_link_overrun_errors |= temp_64;
2980 
2981 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
2982 
2983 	p->vl15_dropped = 0; /* N/A for OPA */
2984 
2985 bail:
2986 	return reply((struct ib_mad_hdr *)pmp);
2987 }
2988 
2989 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2990 				 struct ib_device *ibdev,
2991 				 u8 port, u32 *resp_len)
2992 {
2993 	size_t response_data_size;
2994 	struct _port_ei *rsp;
2995 	struct opa_port_error_info_msg *req;
2996 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2997 	u64 port_mask;
2998 	u32 num_ports;
2999 	u8 port_num;
3000 	u8 num_pslm;
3001 	u64 reg;
3002 
3003 	req = (struct opa_port_error_info_msg *)pmp->data;
3004 	rsp = &req->port[0];
3005 
3006 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3007 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3008 
3009 	memset(rsp, 0, sizeof(*rsp));
3010 
3011 	if (num_ports != 1 || num_ports != num_pslm) {
3012 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3013 		return reply((struct ib_mad_hdr *)pmp);
3014 	}
3015 
3016 	/* Sanity check */
3017 	response_data_size = sizeof(struct opa_port_error_info_msg);
3018 
3019 	if (response_data_size > sizeof(pmp->data)) {
3020 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3021 		return reply((struct ib_mad_hdr *)pmp);
3022 	}
3023 
3024 	/*
3025 	 * The bit set in the mask needs to be consistent with the port
3026 	 * the request came in on.
3027 	 */
3028 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3029 	port_num = find_first_bit((unsigned long *)&port_mask,
3030 				  sizeof(port_mask));
3031 
3032 	if (port_num != port) {
3033 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3034 		return reply((struct ib_mad_hdr *)pmp);
3035 	}
3036 
3037 	/* PortRcvErrorInfo */
3038 	rsp->port_rcv_ei.status_and_code =
3039 		dd->err_info_rcvport.status_and_code;
3040 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3041 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3042 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3043 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3044 
3045 	/* ExcessiverBufferOverrunInfo */
3046 	reg = read_csr(dd, RCV_ERR_INFO);
3047 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3048 		/*
3049 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3050 		 * first pkt that encountered an excess buffer overrun
3051 		 */
3052 		u8 tmp = (u8)reg;
3053 
3054 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3055 		tmp <<= 2;
3056 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3057 		/* set the status bit */
3058 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3059 	}
3060 
3061 	rsp->port_xmit_constraint_ei.status =
3062 		dd->err_info_xmit_constraint.status;
3063 	rsp->port_xmit_constraint_ei.pkey =
3064 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3065 	rsp->port_xmit_constraint_ei.slid =
3066 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3067 
3068 	rsp->port_rcv_constraint_ei.status =
3069 		dd->err_info_rcv_constraint.status;
3070 	rsp->port_rcv_constraint_ei.pkey =
3071 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3072 	rsp->port_rcv_constraint_ei.slid =
3073 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3074 
3075 	/* UncorrectableErrorInfo */
3076 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3077 
3078 	/* FMConfigErrorInfo */
3079 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3080 
3081 	if (resp_len)
3082 		*resp_len += response_data_size;
3083 
3084 	return reply((struct ib_mad_hdr *)pmp);
3085 }
3086 
3087 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3088 				  struct ib_device *ibdev,
3089 				  u8 port, u32 *resp_len)
3090 {
3091 	struct opa_clear_port_status *req =
3092 		(struct opa_clear_port_status *)pmp->data;
3093 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3094 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3095 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3096 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3097 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3098 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3099 	u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3100 	unsigned long vl;
3101 
3102 	if ((nports != 1) || (portn != 1 << port)) {
3103 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3104 		return reply((struct ib_mad_hdr *)pmp);
3105 	}
3106 	/*
3107 	 * only counters returned by pma_get_opa_portstatus() are
3108 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3109 	 * the corresponding change should be made here as well.
3110 	 */
3111 
3112 	if (counter_select & CS_PORT_XMIT_DATA)
3113 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3114 
3115 	if (counter_select & CS_PORT_RCV_DATA)
3116 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3117 
3118 	if (counter_select & CS_PORT_XMIT_PKTS)
3119 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3120 
3121 	if (counter_select & CS_PORT_RCV_PKTS)
3122 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3123 
3124 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3125 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3126 
3127 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3128 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3129 
3130 	if (counter_select & CS_PORT_XMIT_WAIT)
3131 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3132 
3133 	/* ignore cs_sw_portCongestion for HFIs */
3134 
3135 	if (counter_select & CS_PORT_RCV_FECN)
3136 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3137 
3138 	if (counter_select & CS_PORT_RCV_BECN)
3139 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3140 
3141 	/* ignore cs_port_xmit_time_cong for HFIs */
3142 	/* ignore cs_port_xmit_wasted_bw for now */
3143 	/* ignore cs_port_xmit_wait_data for now */
3144 	if (counter_select & CS_PORT_RCV_BUBBLE)
3145 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3146 
3147 	/* Only applicable for switch */
3148 	/* if (counter_select & CS_PORT_MARK_FECN)
3149 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3150 	 */
3151 
3152 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3153 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3154 
3155 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3156 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3157 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3158 
3159 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3160 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3161 
3162 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3163 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3164 
3165 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) {
3166 		write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3167 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3168 	}
3169 
3170 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3171 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3172 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3173 			       CNTR_INVALID_VL, 0);
3174 	}
3175 
3176 	if (counter_select & CS_PORT_RCV_ERRORS)
3177 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3178 
3179 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3180 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3181 		dd->rcv_ovfl_cnt = 0;
3182 	}
3183 
3184 	if (counter_select & CS_FM_CONFIG_ERRORS)
3185 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3186 
3187 	if (counter_select & CS_LINK_DOWNED)
3188 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3189 
3190 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3191 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3192 
3193 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3194 			 8 * sizeof(vl_select_mask)) {
3195 		if (counter_select & CS_PORT_XMIT_DATA)
3196 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3197 
3198 		if (counter_select & CS_PORT_RCV_DATA)
3199 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3200 
3201 		if (counter_select & CS_PORT_XMIT_PKTS)
3202 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3203 
3204 		if (counter_select & CS_PORT_RCV_PKTS)
3205 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3206 
3207 		if (counter_select & CS_PORT_XMIT_WAIT)
3208 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3209 
3210 		/* sw_port_vl_congestion is 0 for HFIs */
3211 		if (counter_select & CS_PORT_RCV_FECN)
3212 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3213 
3214 		if (counter_select & CS_PORT_RCV_BECN)
3215 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3216 
3217 		/* port_vl_xmit_time_cong is 0 for HFIs */
3218 		/* port_vl_xmit_wasted_bw ??? */
3219 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3220 		if (counter_select & CS_PORT_RCV_BUBBLE)
3221 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3222 
3223 		/* if (counter_select & CS_PORT_MARK_FECN)
3224 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3225 		 */
3226 		/* port_vl_xmit_discards ??? */
3227 	}
3228 
3229 	if (resp_len)
3230 		*resp_len += sizeof(*req);
3231 
3232 	return reply((struct ib_mad_hdr *)pmp);
3233 }
3234 
3235 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3236 				 struct ib_device *ibdev,
3237 				 u8 port, u32 *resp_len)
3238 {
3239 	struct _port_ei *rsp;
3240 	struct opa_port_error_info_msg *req;
3241 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3242 	u64 port_mask;
3243 	u32 num_ports;
3244 	u8 port_num;
3245 	u8 num_pslm;
3246 	u32 error_info_select;
3247 
3248 	req = (struct opa_port_error_info_msg *)pmp->data;
3249 	rsp = &req->port[0];
3250 
3251 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3252 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3253 
3254 	memset(rsp, 0, sizeof(*rsp));
3255 
3256 	if (num_ports != 1 || num_ports != num_pslm) {
3257 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3258 		return reply((struct ib_mad_hdr *)pmp);
3259 	}
3260 
3261 	/*
3262 	 * The bit set in the mask needs to be consistent with the port
3263 	 * the request came in on.
3264 	 */
3265 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3266 	port_num = find_first_bit((unsigned long *)&port_mask,
3267 				  sizeof(port_mask));
3268 
3269 	if (port_num != port) {
3270 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3271 		return reply((struct ib_mad_hdr *)pmp);
3272 	}
3273 
3274 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3275 
3276 	/* PortRcvErrorInfo */
3277 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3278 		/* turn off status bit */
3279 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3280 
3281 	/* ExcessiverBufferOverrunInfo */
3282 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3283 		/*
3284 		 * status bit is essentially kept in the h/w - bit 5 of
3285 		 * RCV_ERR_INFO
3286 		 */
3287 		write_csr(dd, RCV_ERR_INFO,
3288 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3289 
3290 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3291 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3292 
3293 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3294 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3295 
3296 	/* UncorrectableErrorInfo */
3297 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3298 		/* turn off status bit */
3299 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3300 
3301 	/* FMConfigErrorInfo */
3302 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3303 		/* turn off status bit */
3304 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3305 
3306 	if (resp_len)
3307 		*resp_len += sizeof(*req);
3308 
3309 	return reply((struct ib_mad_hdr *)pmp);
3310 }
3311 
3312 struct opa_congestion_info_attr {
3313 	__be16 congestion_info;
3314 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3315 	u8 congestion_log_length;
3316 } __packed;
3317 
3318 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3319 				    struct ib_device *ibdev, u8 port,
3320 				    u32 *resp_len)
3321 {
3322 	struct opa_congestion_info_attr *p =
3323 		(struct opa_congestion_info_attr *)data;
3324 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3325 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3326 
3327 	p->congestion_info = 0;
3328 	p->control_table_cap = ppd->cc_max_table_entries;
3329 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3330 
3331 	if (resp_len)
3332 		*resp_len += sizeof(*p);
3333 
3334 	return reply((struct ib_mad_hdr *)smp);
3335 }
3336 
3337 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3338 				       u8 *data, struct ib_device *ibdev,
3339 				       u8 port, u32 *resp_len)
3340 {
3341 	int i;
3342 	struct opa_congestion_setting_attr *p =
3343 		(struct opa_congestion_setting_attr *)data;
3344 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3345 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3346 	struct opa_congestion_setting_entry_shadow *entries;
3347 	struct cc_state *cc_state;
3348 
3349 	rcu_read_lock();
3350 
3351 	cc_state = get_cc_state(ppd);
3352 
3353 	if (!cc_state) {
3354 		rcu_read_unlock();
3355 		return reply((struct ib_mad_hdr *)smp);
3356 	}
3357 
3358 	entries = cc_state->cong_setting.entries;
3359 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3360 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3361 	for (i = 0; i < OPA_MAX_SLS; i++) {
3362 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3363 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3364 		p->entries[i].trigger_threshold =
3365 			entries[i].trigger_threshold;
3366 		p->entries[i].ccti_min = entries[i].ccti_min;
3367 	}
3368 
3369 	rcu_read_unlock();
3370 
3371 	if (resp_len)
3372 		*resp_len += sizeof(*p);
3373 
3374 	return reply((struct ib_mad_hdr *)smp);
3375 }
3376 
3377 /*
3378  * Apply congestion control information stored in the ppd to the
3379  * active structure.
3380  */
3381 static void apply_cc_state(struct hfi1_pportdata *ppd)
3382 {
3383 	struct cc_state *old_cc_state, *new_cc_state;
3384 
3385 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3386 	if (!new_cc_state)
3387 		return;
3388 
3389 	/*
3390 	 * Hold the lock for updating *and* to prevent ppd information
3391 	 * from changing during the update.
3392 	 */
3393 	spin_lock(&ppd->cc_state_lock);
3394 
3395 	old_cc_state = get_cc_state(ppd);
3396 	if (!old_cc_state) {
3397 		/* never active, or shutting down */
3398 		spin_unlock(&ppd->cc_state_lock);
3399 		kfree(new_cc_state);
3400 		return;
3401 	}
3402 
3403 	*new_cc_state = *old_cc_state;
3404 
3405 	new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3406 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3407 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3408 
3409 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3410 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3411 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3412 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3413 
3414 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3415 
3416 	spin_unlock(&ppd->cc_state_lock);
3417 
3418 	call_rcu(&old_cc_state->rcu, cc_state_reclaim);
3419 }
3420 
3421 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3422 				       struct ib_device *ibdev, u8 port,
3423 				       u32 *resp_len)
3424 {
3425 	struct opa_congestion_setting_attr *p =
3426 		(struct opa_congestion_setting_attr *)data;
3427 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3428 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3429 	struct opa_congestion_setting_entry_shadow *entries;
3430 	int i;
3431 
3432 	/*
3433 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3434 	 * our information is consistent with anyone trying to apply the state.
3435 	 */
3436 	spin_lock(&ppd->cc_state_lock);
3437 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3438 
3439 	entries = ppd->congestion_entries;
3440 	for (i = 0; i < OPA_MAX_SLS; i++) {
3441 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3442 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3443 		entries[i].trigger_threshold =
3444 			p->entries[i].trigger_threshold;
3445 		entries[i].ccti_min = p->entries[i].ccti_min;
3446 	}
3447 	spin_unlock(&ppd->cc_state_lock);
3448 
3449 	/* now apply the information */
3450 	apply_cc_state(ppd);
3451 
3452 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3453 					   resp_len);
3454 }
3455 
3456 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3457 					u8 *data, struct ib_device *ibdev,
3458 					u8 port, u32 *resp_len)
3459 {
3460 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3461 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3462 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3463 	s64 ts;
3464 	int i;
3465 
3466 	if (am != 0) {
3467 		smp->status |= IB_SMP_INVALID_FIELD;
3468 		return reply((struct ib_mad_hdr *)smp);
3469 	}
3470 
3471 	spin_lock_irq(&ppd->cc_log_lock);
3472 
3473 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3474 	cong_log->congestion_flags = 0;
3475 	cong_log->threshold_event_counter =
3476 		cpu_to_be16(ppd->threshold_event_counter);
3477 	memcpy(cong_log->threshold_cong_event_map,
3478 	       ppd->threshold_cong_event_map,
3479 	       sizeof(cong_log->threshold_cong_event_map));
3480 	/* keep timestamp in units of 1.024 usec */
3481 	ts = ktime_to_ns(ktime_get()) / 1024;
3482 	cong_log->current_time_stamp = cpu_to_be32(ts);
3483 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3484 		struct opa_hfi1_cong_log_event_internal *cce =
3485 			&ppd->cc_events[ppd->cc_mad_idx++];
3486 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3487 			ppd->cc_mad_idx = 0;
3488 		/*
3489 		 * Entries which are older than twice the time
3490 		 * required to wrap the counter are supposed to
3491 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3492 		 */
3493 		if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3494 			continue;
3495 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3496 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3497 		       &cce->rqpn, 3);
3498 		cong_log->events[i].sl_svc_type_cn_entry =
3499 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3500 		cong_log->events[i].remote_lid_cn_entry =
3501 			cpu_to_be32(cce->rlid);
3502 		cong_log->events[i].timestamp_cn_entry =
3503 			cpu_to_be32(cce->timestamp);
3504 	}
3505 
3506 	/*
3507 	 * Reset threshold_cong_event_map, and threshold_event_counter
3508 	 * to 0 when log is read.
3509 	 */
3510 	memset(ppd->threshold_cong_event_map, 0x0,
3511 	       sizeof(ppd->threshold_cong_event_map));
3512 	ppd->threshold_event_counter = 0;
3513 
3514 	spin_unlock_irq(&ppd->cc_log_lock);
3515 
3516 	if (resp_len)
3517 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3518 
3519 	return reply((struct ib_mad_hdr *)smp);
3520 }
3521 
3522 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3523 				   struct ib_device *ibdev, u8 port,
3524 				   u32 *resp_len)
3525 {
3526 	struct ib_cc_table_attr *cc_table_attr =
3527 		(struct ib_cc_table_attr *)data;
3528 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3529 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3530 	u32 start_block = OPA_AM_START_BLK(am);
3531 	u32 n_blocks = OPA_AM_NBLK(am);
3532 	struct ib_cc_table_entry_shadow *entries;
3533 	int i, j;
3534 	u32 sentry, eentry;
3535 	struct cc_state *cc_state;
3536 
3537 	/* sanity check n_blocks, start_block */
3538 	if (n_blocks == 0 ||
3539 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3540 		smp->status |= IB_SMP_INVALID_FIELD;
3541 		return reply((struct ib_mad_hdr *)smp);
3542 	}
3543 
3544 	rcu_read_lock();
3545 
3546 	cc_state = get_cc_state(ppd);
3547 
3548 	if (!cc_state) {
3549 		rcu_read_unlock();
3550 		return reply((struct ib_mad_hdr *)smp);
3551 	}
3552 
3553 	sentry = start_block * IB_CCT_ENTRIES;
3554 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3555 
3556 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3557 
3558 	entries = cc_state->cct.entries;
3559 
3560 	/* return n_blocks, though the last block may not be full */
3561 	for (j = 0, i = sentry; i < eentry; j++, i++)
3562 		cc_table_attr->ccti_entries[j].entry =
3563 			cpu_to_be16(entries[i].entry);
3564 
3565 	rcu_read_unlock();
3566 
3567 	if (resp_len)
3568 		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3569 
3570 	return reply((struct ib_mad_hdr *)smp);
3571 }
3572 
3573 void cc_state_reclaim(struct rcu_head *rcu)
3574 {
3575 	struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
3576 
3577 	kfree(cc_state);
3578 }
3579 
3580 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3581 				   struct ib_device *ibdev, u8 port,
3582 				   u32 *resp_len)
3583 {
3584 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3585 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3586 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3587 	u32 start_block = OPA_AM_START_BLK(am);
3588 	u32 n_blocks = OPA_AM_NBLK(am);
3589 	struct ib_cc_table_entry_shadow *entries;
3590 	int i, j;
3591 	u32 sentry, eentry;
3592 	u16 ccti_limit;
3593 
3594 	/* sanity check n_blocks, start_block */
3595 	if (n_blocks == 0 ||
3596 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3597 		smp->status |= IB_SMP_INVALID_FIELD;
3598 		return reply((struct ib_mad_hdr *)smp);
3599 	}
3600 
3601 	sentry = start_block * IB_CCT_ENTRIES;
3602 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3603 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3604 
3605 	/* sanity check ccti_limit */
3606 	ccti_limit = be16_to_cpu(p->ccti_limit);
3607 	if (ccti_limit + 1 > eentry) {
3608 		smp->status |= IB_SMP_INVALID_FIELD;
3609 		return reply((struct ib_mad_hdr *)smp);
3610 	}
3611 
3612 	/*
3613 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3614 	 * our information is consistent with anyone trying to apply the state.
3615 	 */
3616 	spin_lock(&ppd->cc_state_lock);
3617 	ppd->total_cct_entry = ccti_limit + 1;
3618 	entries = ppd->ccti_entries;
3619 	for (j = 0, i = sentry; i < eentry; j++, i++)
3620 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3621 	spin_unlock(&ppd->cc_state_lock);
3622 
3623 	/* now apply the information */
3624 	apply_cc_state(ppd);
3625 
3626 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3627 }
3628 
3629 struct opa_led_info {
3630 	__be32 rsvd_led_mask;
3631 	__be32 rsvd;
3632 };
3633 
3634 #define OPA_LED_SHIFT	31
3635 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3636 
3637 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3638 				   struct ib_device *ibdev, u8 port,
3639 				   u32 *resp_len)
3640 {
3641 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3642 	struct hfi1_pportdata *ppd = dd->pport;
3643 	struct opa_led_info *p = (struct opa_led_info *)data;
3644 	u32 nport = OPA_AM_NPORT(am);
3645 	u32 is_beaconing_active;
3646 
3647 	if (nport != 1) {
3648 		smp->status |= IB_SMP_INVALID_FIELD;
3649 		return reply((struct ib_mad_hdr *)smp);
3650 	}
3651 
3652 	/*
3653 	 * This pairs with the memory barrier in hfi1_start_led_override to
3654 	 * ensure that we read the correct state of LED beaconing represented
3655 	 * by led_override_timer_active
3656 	 */
3657 	smp_rmb();
3658 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3659 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3660 
3661 	if (resp_len)
3662 		*resp_len += sizeof(struct opa_led_info);
3663 
3664 	return reply((struct ib_mad_hdr *)smp);
3665 }
3666 
3667 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3668 				   struct ib_device *ibdev, u8 port,
3669 				   u32 *resp_len)
3670 {
3671 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3672 	struct opa_led_info *p = (struct opa_led_info *)data;
3673 	u32 nport = OPA_AM_NPORT(am);
3674 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3675 
3676 	if (nport != 1) {
3677 		smp->status |= IB_SMP_INVALID_FIELD;
3678 		return reply((struct ib_mad_hdr *)smp);
3679 	}
3680 
3681 	if (on)
3682 		hfi1_start_led_override(dd->pport, 2000, 1500);
3683 	else
3684 		shutdown_led_override(dd->pport);
3685 
3686 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3687 }
3688 
3689 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3690 			    u8 *data, struct ib_device *ibdev, u8 port,
3691 			    u32 *resp_len)
3692 {
3693 	int ret;
3694 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3695 
3696 	switch (attr_id) {
3697 	case IB_SMP_ATTR_NODE_DESC:
3698 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3699 					      resp_len);
3700 		break;
3701 	case IB_SMP_ATTR_NODE_INFO:
3702 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3703 					      resp_len);
3704 		break;
3705 	case IB_SMP_ATTR_PORT_INFO:
3706 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3707 					      resp_len);
3708 		break;
3709 	case IB_SMP_ATTR_PKEY_TABLE:
3710 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3711 					       resp_len);
3712 		break;
3713 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3714 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3715 					      resp_len);
3716 		break;
3717 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3718 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3719 					      resp_len);
3720 		break;
3721 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3722 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3723 					       resp_len);
3724 		break;
3725 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3726 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3727 						resp_len);
3728 		break;
3729 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3730 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3731 					 resp_len);
3732 		break;
3733 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3734 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3735 					 resp_len);
3736 		break;
3737 	case OPA_ATTRIB_ID_CABLE_INFO:
3738 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3739 						resp_len);
3740 		break;
3741 	case IB_SMP_ATTR_VL_ARB_TABLE:
3742 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3743 					    resp_len);
3744 		break;
3745 	case OPA_ATTRIB_ID_CONGESTION_INFO:
3746 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3747 					       resp_len);
3748 		break;
3749 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3750 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3751 						  port, resp_len);
3752 		break;
3753 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3754 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3755 						   port, resp_len);
3756 		break;
3757 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3758 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3759 					      resp_len);
3760 		break;
3761 	case IB_SMP_ATTR_LED_INFO:
3762 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3763 					      resp_len);
3764 		break;
3765 	case IB_SMP_ATTR_SM_INFO:
3766 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3767 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3768 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3769 			return IB_MAD_RESULT_SUCCESS;
3770 		/* FALLTHROUGH */
3771 	default:
3772 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3773 		ret = reply((struct ib_mad_hdr *)smp);
3774 		break;
3775 	}
3776 	return ret;
3777 }
3778 
3779 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3780 			    u8 *data, struct ib_device *ibdev, u8 port,
3781 			    u32 *resp_len)
3782 {
3783 	int ret;
3784 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3785 
3786 	switch (attr_id) {
3787 	case IB_SMP_ATTR_PORT_INFO:
3788 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3789 					      resp_len);
3790 		break;
3791 	case IB_SMP_ATTR_PKEY_TABLE:
3792 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3793 					       resp_len);
3794 		break;
3795 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3796 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3797 					      resp_len);
3798 		break;
3799 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3800 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3801 					      resp_len);
3802 		break;
3803 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3804 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3805 					       resp_len);
3806 		break;
3807 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3808 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3809 						resp_len);
3810 		break;
3811 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3812 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3813 					 resp_len);
3814 		break;
3815 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3816 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3817 					 resp_len);
3818 		break;
3819 	case IB_SMP_ATTR_VL_ARB_TABLE:
3820 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3821 					    resp_len);
3822 		break;
3823 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3824 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3825 						  port, resp_len);
3826 		break;
3827 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3828 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3829 					      resp_len);
3830 		break;
3831 	case IB_SMP_ATTR_LED_INFO:
3832 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3833 					      resp_len);
3834 		break;
3835 	case IB_SMP_ATTR_SM_INFO:
3836 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3837 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3838 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3839 			return IB_MAD_RESULT_SUCCESS;
3840 		/* FALLTHROUGH */
3841 	default:
3842 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3843 		ret = reply((struct ib_mad_hdr *)smp);
3844 		break;
3845 	}
3846 	return ret;
3847 }
3848 
3849 static inline void set_aggr_error(struct opa_aggregate *ag)
3850 {
3851 	ag->err_reqlength |= cpu_to_be16(0x8000);
3852 }
3853 
3854 static int subn_get_opa_aggregate(struct opa_smp *smp,
3855 				  struct ib_device *ibdev, u8 port,
3856 				  u32 *resp_len)
3857 {
3858 	int i;
3859 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3860 	u8 *next_smp = opa_get_smp_data(smp);
3861 
3862 	if (num_attr < 1 || num_attr > 117) {
3863 		smp->status |= IB_SMP_INVALID_FIELD;
3864 		return reply((struct ib_mad_hdr *)smp);
3865 	}
3866 
3867 	for (i = 0; i < num_attr; i++) {
3868 		struct opa_aggregate *agg;
3869 		size_t agg_data_len;
3870 		size_t agg_size;
3871 		u32 am;
3872 
3873 		agg = (struct opa_aggregate *)next_smp;
3874 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3875 		agg_size = sizeof(*agg) + agg_data_len;
3876 		am = be32_to_cpu(agg->attr_mod);
3877 
3878 		*resp_len += agg_size;
3879 
3880 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3881 			smp->status |= IB_SMP_INVALID_FIELD;
3882 			return reply((struct ib_mad_hdr *)smp);
3883 		}
3884 
3885 		/* zero the payload for this segment */
3886 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
3887 
3888 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3889 					ibdev, port, NULL);
3890 		if (smp->status & ~IB_SMP_DIRECTION) {
3891 			set_aggr_error(agg);
3892 			return reply((struct ib_mad_hdr *)smp);
3893 		}
3894 		next_smp += agg_size;
3895 	}
3896 
3897 	return reply((struct ib_mad_hdr *)smp);
3898 }
3899 
3900 static int subn_set_opa_aggregate(struct opa_smp *smp,
3901 				  struct ib_device *ibdev, u8 port,
3902 				  u32 *resp_len)
3903 {
3904 	int i;
3905 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3906 	u8 *next_smp = opa_get_smp_data(smp);
3907 
3908 	if (num_attr < 1 || num_attr > 117) {
3909 		smp->status |= IB_SMP_INVALID_FIELD;
3910 		return reply((struct ib_mad_hdr *)smp);
3911 	}
3912 
3913 	for (i = 0; i < num_attr; i++) {
3914 		struct opa_aggregate *agg;
3915 		size_t agg_data_len;
3916 		size_t agg_size;
3917 		u32 am;
3918 
3919 		agg = (struct opa_aggregate *)next_smp;
3920 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3921 		agg_size = sizeof(*agg) + agg_data_len;
3922 		am = be32_to_cpu(agg->attr_mod);
3923 
3924 		*resp_len += agg_size;
3925 
3926 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3927 			smp->status |= IB_SMP_INVALID_FIELD;
3928 			return reply((struct ib_mad_hdr *)smp);
3929 		}
3930 
3931 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3932 					ibdev, port, NULL);
3933 		if (smp->status & ~IB_SMP_DIRECTION) {
3934 			set_aggr_error(agg);
3935 			return reply((struct ib_mad_hdr *)smp);
3936 		}
3937 		next_smp += agg_size;
3938 	}
3939 
3940 	return reply((struct ib_mad_hdr *)smp);
3941 }
3942 
3943 /*
3944  * OPAv1 specifies that, on the transition to link up, these counters
3945  * are cleared:
3946  *   PortRcvErrors [*]
3947  *   LinkErrorRecovery
3948  *   LocalLinkIntegrityErrors
3949  *   ExcessiveBufferOverruns [*]
3950  *
3951  * [*] Error info associated with these counters is retained, but the
3952  * error info status is reset to 0.
3953  */
3954 void clear_linkup_counters(struct hfi1_devdata *dd)
3955 {
3956 	/* PortRcvErrors */
3957 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3958 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3959 	/* LinkErrorRecovery */
3960 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3961 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3962 	/* LocalLinkIntegrityErrors */
3963 	write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3964 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3965 	/* ExcessiveBufferOverruns */
3966 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3967 	dd->rcv_ovfl_cnt = 0;
3968 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3969 }
3970 
3971 /*
3972  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3973  * local node, 0 otherwise.
3974  */
3975 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3976 			const struct ib_wc *in_wc)
3977 {
3978 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3979 	const struct opa_smp *smp = (const struct opa_smp *)mad;
3980 
3981 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3982 		return (smp->hop_cnt == 0 &&
3983 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3984 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3985 	}
3986 
3987 	return (in_wc->slid == ppd->lid);
3988 }
3989 
3990 /*
3991  * opa_local_smp_check() should only be called on MADs for which
3992  * is_local_mad() returns true. It applies the SMP checks that are
3993  * specific to SMPs which are sent from, and destined to this node.
3994  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3995  * otherwise.
3996  *
3997  * SMPs which arrive from other nodes are instead checked by
3998  * opa_smp_check().
3999  */
4000 static int opa_local_smp_check(struct hfi1_ibport *ibp,
4001 			       const struct ib_wc *in_wc)
4002 {
4003 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4004 	u16 slid = in_wc->slid;
4005 	u16 pkey;
4006 
4007 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4008 		return 1;
4009 
4010 	pkey = ppd->pkeys[in_wc->pkey_index];
4011 	/*
4012 	 * We need to do the "node-local" checks specified in OPAv1,
4013 	 * rev 0.90, section 9.10.26, which are:
4014 	 *   - pkey is 0x7fff, or 0xffff
4015 	 *   - Source QPN == 0 || Destination QPN == 0
4016 	 *   - the MAD header's management class is either
4017 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4018 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4019 	 *   - SLID != 0
4020 	 *
4021 	 * However, we know (and so don't need to check again) that,
4022 	 * for local SMPs, the MAD stack passes MADs with:
4023 	 *   - Source QPN of 0
4024 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4025 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4026 	 *     our own port's lid
4027 	 *
4028 	 */
4029 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4030 		return 0;
4031 	ingress_pkey_table_fail(ppd, pkey, slid);
4032 	return 1;
4033 }
4034 
4035 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4036 			    u8 port, const struct opa_mad *in_mad,
4037 			    struct opa_mad *out_mad,
4038 			    u32 *resp_len)
4039 {
4040 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4041 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4042 	u8 *data;
4043 	u32 am;
4044 	__be16 attr_id;
4045 	int ret;
4046 
4047 	*out_mad = *in_mad;
4048 	data = opa_get_smp_data(smp);
4049 
4050 	am = be32_to_cpu(smp->attr_mod);
4051 	attr_id = smp->attr_id;
4052 	if (smp->class_version != OPA_SMI_CLASS_VERSION) {
4053 		smp->status |= IB_SMP_UNSUP_VERSION;
4054 		ret = reply((struct ib_mad_hdr *)smp);
4055 		return ret;
4056 	}
4057 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4058 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4059 			 smp->hop_cnt);
4060 	if (ret) {
4061 		u32 port_num = be32_to_cpu(smp->attr_mod);
4062 
4063 		/*
4064 		 * If this is a get/set portinfo, we already check the
4065 		 * M_Key if the MAD is for another port and the M_Key
4066 		 * is OK on the receiving port. This check is needed
4067 		 * to increment the error counters when the M_Key
4068 		 * fails to match on *both* ports.
4069 		 */
4070 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4071 		    (smp->method == IB_MGMT_METHOD_GET ||
4072 		     smp->method == IB_MGMT_METHOD_SET) &&
4073 		    port_num && port_num <= ibdev->phys_port_cnt &&
4074 		    port != port_num)
4075 			(void)check_mkey(to_iport(ibdev, port_num),
4076 					  (struct ib_mad_hdr *)smp, 0,
4077 					  smp->mkey, smp->route.dr.dr_slid,
4078 					  smp->route.dr.return_path,
4079 					  smp->hop_cnt);
4080 		ret = IB_MAD_RESULT_FAILURE;
4081 		return ret;
4082 	}
4083 
4084 	*resp_len = opa_get_smp_header_size(smp);
4085 
4086 	switch (smp->method) {
4087 	case IB_MGMT_METHOD_GET:
4088 		switch (attr_id) {
4089 		default:
4090 			clear_opa_smp_data(smp);
4091 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4092 					       ibdev, port, resp_len);
4093 			break;
4094 		case OPA_ATTRIB_ID_AGGREGATE:
4095 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4096 						     resp_len);
4097 			break;
4098 		}
4099 		break;
4100 	case IB_MGMT_METHOD_SET:
4101 		switch (attr_id) {
4102 		default:
4103 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4104 					       ibdev, port, resp_len);
4105 			break;
4106 		case OPA_ATTRIB_ID_AGGREGATE:
4107 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4108 						     resp_len);
4109 			break;
4110 		}
4111 		break;
4112 	case IB_MGMT_METHOD_TRAP:
4113 	case IB_MGMT_METHOD_REPORT:
4114 	case IB_MGMT_METHOD_REPORT_RESP:
4115 	case IB_MGMT_METHOD_GET_RESP:
4116 		/*
4117 		 * The ib_mad module will call us to process responses
4118 		 * before checking for other consumers.
4119 		 * Just tell the caller to process it normally.
4120 		 */
4121 		ret = IB_MAD_RESULT_SUCCESS;
4122 		break;
4123 	default:
4124 		smp->status |= IB_SMP_UNSUP_METHOD;
4125 		ret = reply((struct ib_mad_hdr *)smp);
4126 		break;
4127 	}
4128 
4129 	return ret;
4130 }
4131 
4132 static int process_subn(struct ib_device *ibdev, int mad_flags,
4133 			u8 port, const struct ib_mad *in_mad,
4134 			struct ib_mad *out_mad)
4135 {
4136 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4137 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4138 	int ret;
4139 
4140 	*out_mad = *in_mad;
4141 	if (smp->class_version != 1) {
4142 		smp->status |= IB_SMP_UNSUP_VERSION;
4143 		ret = reply((struct ib_mad_hdr *)smp);
4144 		return ret;
4145 	}
4146 
4147 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4148 			 smp->mkey, (__force __be32)smp->dr_slid,
4149 			 smp->return_path, smp->hop_cnt);
4150 	if (ret) {
4151 		u32 port_num = be32_to_cpu(smp->attr_mod);
4152 
4153 		/*
4154 		 * If this is a get/set portinfo, we already check the
4155 		 * M_Key if the MAD is for another port and the M_Key
4156 		 * is OK on the receiving port. This check is needed
4157 		 * to increment the error counters when the M_Key
4158 		 * fails to match on *both* ports.
4159 		 */
4160 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4161 		    (smp->method == IB_MGMT_METHOD_GET ||
4162 		     smp->method == IB_MGMT_METHOD_SET) &&
4163 		    port_num && port_num <= ibdev->phys_port_cnt &&
4164 		    port != port_num)
4165 			(void)check_mkey(to_iport(ibdev, port_num),
4166 					 (struct ib_mad_hdr *)smp, 0,
4167 					 smp->mkey,
4168 					 (__force __be32)smp->dr_slid,
4169 					 smp->return_path, smp->hop_cnt);
4170 		ret = IB_MAD_RESULT_FAILURE;
4171 		return ret;
4172 	}
4173 
4174 	switch (smp->method) {
4175 	case IB_MGMT_METHOD_GET:
4176 		switch (smp->attr_id) {
4177 		case IB_SMP_ATTR_NODE_INFO:
4178 			ret = subn_get_nodeinfo(smp, ibdev, port);
4179 			break;
4180 		default:
4181 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4182 			ret = reply((struct ib_mad_hdr *)smp);
4183 			break;
4184 		}
4185 		break;
4186 	}
4187 
4188 	return ret;
4189 }
4190 
4191 static int process_perf(struct ib_device *ibdev, u8 port,
4192 			const struct ib_mad *in_mad,
4193 			struct ib_mad *out_mad)
4194 {
4195 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4196 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4197 						&pmp->data;
4198 	int ret = IB_MAD_RESULT_FAILURE;
4199 
4200 	*out_mad = *in_mad;
4201 	if (pmp->mad_hdr.class_version != 1) {
4202 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4203 		ret = reply((struct ib_mad_hdr *)pmp);
4204 		return ret;
4205 	}
4206 
4207 	switch (pmp->mad_hdr.method) {
4208 	case IB_MGMT_METHOD_GET:
4209 		switch (pmp->mad_hdr.attr_id) {
4210 		case IB_PMA_PORT_COUNTERS:
4211 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4212 			break;
4213 		case IB_PMA_PORT_COUNTERS_EXT:
4214 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4215 			break;
4216 		case IB_PMA_CLASS_PORT_INFO:
4217 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4218 			ret = reply((struct ib_mad_hdr *)pmp);
4219 			break;
4220 		default:
4221 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4222 			ret = reply((struct ib_mad_hdr *)pmp);
4223 			break;
4224 		}
4225 		break;
4226 
4227 	case IB_MGMT_METHOD_SET:
4228 		if (pmp->mad_hdr.attr_id) {
4229 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4230 			ret = reply((struct ib_mad_hdr *)pmp);
4231 		}
4232 		break;
4233 
4234 	case IB_MGMT_METHOD_TRAP:
4235 	case IB_MGMT_METHOD_GET_RESP:
4236 		/*
4237 		 * The ib_mad module will call us to process responses
4238 		 * before checking for other consumers.
4239 		 * Just tell the caller to process it normally.
4240 		 */
4241 		ret = IB_MAD_RESULT_SUCCESS;
4242 		break;
4243 
4244 	default:
4245 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4246 		ret = reply((struct ib_mad_hdr *)pmp);
4247 		break;
4248 	}
4249 
4250 	return ret;
4251 }
4252 
4253 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4254 			    const struct opa_mad *in_mad,
4255 			    struct opa_mad *out_mad, u32 *resp_len)
4256 {
4257 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4258 	int ret;
4259 
4260 	*out_mad = *in_mad;
4261 
4262 	if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
4263 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4264 		return reply((struct ib_mad_hdr *)pmp);
4265 	}
4266 
4267 	*resp_len = sizeof(pmp->mad_hdr);
4268 
4269 	switch (pmp->mad_hdr.method) {
4270 	case IB_MGMT_METHOD_GET:
4271 		switch (pmp->mad_hdr.attr_id) {
4272 		case IB_PMA_CLASS_PORT_INFO:
4273 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4274 			break;
4275 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4276 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4277 						     resp_len);
4278 			break;
4279 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4280 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4281 						       resp_len);
4282 			break;
4283 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4284 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4285 						     resp_len);
4286 			break;
4287 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4288 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4289 						    resp_len);
4290 			break;
4291 		default:
4292 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4293 			ret = reply((struct ib_mad_hdr *)pmp);
4294 			break;
4295 		}
4296 		break;
4297 
4298 	case IB_MGMT_METHOD_SET:
4299 		switch (pmp->mad_hdr.attr_id) {
4300 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4301 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4302 						     resp_len);
4303 			break;
4304 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4305 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4306 						    resp_len);
4307 			break;
4308 		default:
4309 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4310 			ret = reply((struct ib_mad_hdr *)pmp);
4311 			break;
4312 		}
4313 		break;
4314 
4315 	case IB_MGMT_METHOD_TRAP:
4316 	case IB_MGMT_METHOD_GET_RESP:
4317 		/*
4318 		 * The ib_mad module will call us to process responses
4319 		 * before checking for other consumers.
4320 		 * Just tell the caller to process it normally.
4321 		 */
4322 		ret = IB_MAD_RESULT_SUCCESS;
4323 		break;
4324 
4325 	default:
4326 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4327 		ret = reply((struct ib_mad_hdr *)pmp);
4328 		break;
4329 	}
4330 
4331 	return ret;
4332 }
4333 
4334 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4335 				u8 port, const struct ib_wc *in_wc,
4336 				const struct ib_grh *in_grh,
4337 				const struct opa_mad *in_mad,
4338 				struct opa_mad *out_mad, size_t *out_mad_size,
4339 				u16 *out_mad_pkey_index)
4340 {
4341 	int ret;
4342 	int pkey_idx;
4343 	u32 resp_len = 0;
4344 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4345 
4346 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4347 	if (pkey_idx < 0) {
4348 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4349 			hfi1_get_pkey(ibp, 1));
4350 		pkey_idx = 1;
4351 	}
4352 	*out_mad_pkey_index = (u16)pkey_idx;
4353 
4354 	switch (in_mad->mad_hdr.mgmt_class) {
4355 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4356 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4357 		if (is_local_mad(ibp, in_mad, in_wc)) {
4358 			ret = opa_local_smp_check(ibp, in_wc);
4359 			if (ret)
4360 				return IB_MAD_RESULT_FAILURE;
4361 		}
4362 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4363 				       out_mad, &resp_len);
4364 		goto bail;
4365 	case IB_MGMT_CLASS_PERF_MGMT:
4366 		ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4367 				       &resp_len);
4368 		goto bail;
4369 
4370 	default:
4371 		ret = IB_MAD_RESULT_SUCCESS;
4372 	}
4373 
4374 bail:
4375 	if (ret & IB_MAD_RESULT_REPLY)
4376 		*out_mad_size = round_up(resp_len, 8);
4377 	else if (ret & IB_MAD_RESULT_SUCCESS)
4378 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4379 
4380 	return ret;
4381 }
4382 
4383 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4384 			       const struct ib_wc *in_wc,
4385 			       const struct ib_grh *in_grh,
4386 			       const struct ib_mad *in_mad,
4387 			       struct ib_mad *out_mad)
4388 {
4389 	int ret;
4390 
4391 	switch (in_mad->mad_hdr.mgmt_class) {
4392 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4393 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4394 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4395 		break;
4396 	case IB_MGMT_CLASS_PERF_MGMT:
4397 		ret = process_perf(ibdev, port, in_mad, out_mad);
4398 		break;
4399 	default:
4400 		ret = IB_MAD_RESULT_SUCCESS;
4401 		break;
4402 	}
4403 
4404 	return ret;
4405 }
4406 
4407 /**
4408  * hfi1_process_mad - process an incoming MAD packet
4409  * @ibdev: the infiniband device this packet came in on
4410  * @mad_flags: MAD flags
4411  * @port: the port number this packet came in on
4412  * @in_wc: the work completion entry for this packet
4413  * @in_grh: the global route header for this packet
4414  * @in_mad: the incoming MAD
4415  * @out_mad: any outgoing MAD reply
4416  *
4417  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4418  * interested in processing.
4419  *
4420  * Note that the verbs framework has already done the MAD sanity checks,
4421  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4422  * MADs.
4423  *
4424  * This is called by the ib_mad module.
4425  */
4426 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4427 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4428 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4429 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4430 		     u16 *out_mad_pkey_index)
4431 {
4432 	switch (in_mad->base_version) {
4433 	case OPA_MGMT_BASE_VERSION:
4434 		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4435 			dev_err(ibdev->dma_device, "invalid in_mad_size\n");
4436 			return IB_MAD_RESULT_FAILURE;
4437 		}
4438 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4439 					    in_wc, in_grh,
4440 					    (struct opa_mad *)in_mad,
4441 					    (struct opa_mad *)out_mad,
4442 					    out_mad_size,
4443 					    out_mad_pkey_index);
4444 	case IB_MGMT_BASE_VERSION:
4445 		return hfi1_process_ib_mad(ibdev, mad_flags, port,
4446 					  in_wc, in_grh,
4447 					  (const struct ib_mad *)in_mad,
4448 					  (struct ib_mad *)out_mad);
4449 	default:
4450 		break;
4451 	}
4452 
4453 	return IB_MAD_RESULT_FAILURE;
4454 }
4455