xref: /openbmc/linux/drivers/infiniband/hw/hfi1/mad.c (revision 0edbfea5)
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51 
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56 
57 /* the reset value from the FM is supposed to be 0xffff, handle both */
58 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
59 #define OPA_LINK_WIDTH_RESET 0xffff
60 
61 static int reply(struct ib_mad_hdr *smp)
62 {
63 	/*
64 	 * The verbs framework will handle the directed/LID route
65 	 * packet changes.
66 	 */
67 	smp->method = IB_MGMT_METHOD_GET_RESP;
68 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
69 		smp->status |= IB_SMP_DIRECTION;
70 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
71 }
72 
73 static inline void clear_opa_smp_data(struct opa_smp *smp)
74 {
75 	void *data = opa_get_smp_data(smp);
76 	size_t size = opa_get_smp_data_size(smp);
77 
78 	memset(data, 0, size);
79 }
80 
81 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
82 {
83 	struct ib_mad_send_buf *send_buf;
84 	struct ib_mad_agent *agent;
85 	struct opa_smp *smp;
86 	int ret;
87 	unsigned long flags;
88 	unsigned long timeout;
89 	int pkey_idx;
90 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
91 
92 	agent = ibp->rvp.send_agent;
93 	if (!agent)
94 		return;
95 
96 	/* o14-3.2.1 */
97 	if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
98 		return;
99 
100 	/* o14-2 */
101 	if (ibp->rvp.trap_timeout && time_before(jiffies,
102 						 ibp->rvp.trap_timeout))
103 		return;
104 
105 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
106 	if (pkey_idx < 0) {
107 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
108 			__func__, hfi1_get_pkey(ibp, 1));
109 		pkey_idx = 1;
110 	}
111 
112 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
113 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
114 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
115 	if (IS_ERR(send_buf))
116 		return;
117 
118 	smp = send_buf->mad;
119 	smp->base_version = OPA_MGMT_BASE_VERSION;
120 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
121 	smp->class_version = OPA_SMI_CLASS_VERSION;
122 	smp->method = IB_MGMT_METHOD_TRAP;
123 	ibp->rvp.tid++;
124 	smp->tid = cpu_to_be64(ibp->rvp.tid);
125 	smp->attr_id = IB_SMP_ATTR_NOTICE;
126 	/* o14-1: smp->mkey = 0; */
127 	memcpy(smp->route.lid.data, data, len);
128 
129 	spin_lock_irqsave(&ibp->rvp.lock, flags);
130 	if (!ibp->rvp.sm_ah) {
131 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
132 			struct ib_ah *ah;
133 
134 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
135 			if (IS_ERR(ah)) {
136 				ret = PTR_ERR(ah);
137 			} else {
138 				send_buf->ah = ah;
139 				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
140 				ret = 0;
141 			}
142 		} else {
143 			ret = -EINVAL;
144 		}
145 	} else {
146 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
147 		ret = 0;
148 	}
149 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
150 
151 	if (!ret)
152 		ret = ib_post_send_mad(send_buf, NULL);
153 	if (!ret) {
154 		/* 4.096 usec. */
155 		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
156 		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
157 	} else {
158 		ib_free_send_mad(send_buf);
159 		ibp->rvp.trap_timeout = 0;
160 	}
161 }
162 
163 /*
164  * Send a bad [PQ]_Key trap (ch. 14.3.8).
165  */
166 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
167 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2)
168 {
169 	struct opa_mad_notice_attr data;
170 	u32 lid = ppd_from_ibp(ibp)->lid;
171 	u32 _lid1 = lid1;
172 	u32 _lid2 = lid2;
173 
174 	memset(&data, 0, sizeof(data));
175 
176 	if (trap_num == OPA_TRAP_BAD_P_KEY)
177 		ibp->rvp.pkey_violations++;
178 	else
179 		ibp->rvp.qkey_violations++;
180 	ibp->rvp.n_pkt_drops++;
181 
182 	/* Send violation trap */
183 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
184 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
185 	data.trap_num = trap_num;
186 	data.issuer_lid = cpu_to_be32(lid);
187 	data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
188 	data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
189 	data.ntc_257_258.key = cpu_to_be32(key);
190 	data.ntc_257_258.sl = sl << 3;
191 	data.ntc_257_258.qp1 = cpu_to_be32(qp1);
192 	data.ntc_257_258.qp2 = cpu_to_be32(qp2);
193 
194 	send_trap(ibp, &data, sizeof(data));
195 }
196 
197 /*
198  * Send a bad M_Key trap (ch. 14.3.9).
199  */
200 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
201 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
202 {
203 	struct opa_mad_notice_attr data;
204 	u32 lid = ppd_from_ibp(ibp)->lid;
205 
206 	memset(&data, 0, sizeof(data));
207 	/* Send violation trap */
208 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
209 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
210 	data.trap_num = OPA_TRAP_BAD_M_KEY;
211 	data.issuer_lid = cpu_to_be32(lid);
212 	data.ntc_256.lid = data.issuer_lid;
213 	data.ntc_256.method = mad->method;
214 	data.ntc_256.attr_id = mad->attr_id;
215 	data.ntc_256.attr_mod = mad->attr_mod;
216 	data.ntc_256.mkey = mkey;
217 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
218 		data.ntc_256.dr_slid = dr_slid;
219 		data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
220 		if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
221 			data.ntc_256.dr_trunc_hop |=
222 				IB_NOTICE_TRAP_DR_TRUNC;
223 			hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
224 		}
225 		data.ntc_256.dr_trunc_hop |= hop_cnt;
226 		memcpy(data.ntc_256.dr_rtn_path, return_path,
227 		       hop_cnt);
228 	}
229 
230 	send_trap(ibp, &data, sizeof(data));
231 }
232 
233 /*
234  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
235  */
236 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
237 {
238 	struct opa_mad_notice_attr data;
239 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
240 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
241 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
242 	u32 lid = ppd_from_ibp(ibp)->lid;
243 
244 	memset(&data, 0, sizeof(data));
245 
246 	data.generic_type = IB_NOTICE_TYPE_INFO;
247 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
248 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
249 	data.issuer_lid = cpu_to_be32(lid);
250 	data.ntc_144.lid = data.issuer_lid;
251 	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
252 
253 	send_trap(ibp, &data, sizeof(data));
254 }
255 
256 /*
257  * Send a System Image GUID Changed trap (ch. 14.3.12).
258  */
259 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
260 {
261 	struct opa_mad_notice_attr data;
262 	u32 lid = ppd_from_ibp(ibp)->lid;
263 
264 	memset(&data, 0, sizeof(data));
265 
266 	data.generic_type = IB_NOTICE_TYPE_INFO;
267 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
268 	data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
269 	data.issuer_lid = cpu_to_be32(lid);
270 	data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
271 	data.ntc_145.lid = data.issuer_lid;
272 
273 	send_trap(ibp, &data, sizeof(data));
274 }
275 
276 /*
277  * Send a Node Description Changed trap (ch. 14.3.13).
278  */
279 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
280 {
281 	struct opa_mad_notice_attr data;
282 	u32 lid = ppd_from_ibp(ibp)->lid;
283 
284 	memset(&data, 0, sizeof(data));
285 
286 	data.generic_type = IB_NOTICE_TYPE_INFO;
287 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
288 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
289 	data.issuer_lid = cpu_to_be32(lid);
290 	data.ntc_144.lid = data.issuer_lid;
291 	data.ntc_144.change_flags =
292 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
293 
294 	send_trap(ibp, &data, sizeof(data));
295 }
296 
297 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
298 				   u8 *data, struct ib_device *ibdev,
299 				   u8 port, u32 *resp_len)
300 {
301 	struct opa_node_description *nd;
302 
303 	if (am) {
304 		smp->status |= IB_SMP_INVALID_FIELD;
305 		return reply((struct ib_mad_hdr *)smp);
306 	}
307 
308 	nd = (struct opa_node_description *)data;
309 
310 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
311 
312 	if (resp_len)
313 		*resp_len += sizeof(*nd);
314 
315 	return reply((struct ib_mad_hdr *)smp);
316 }
317 
318 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
319 				   struct ib_device *ibdev, u8 port,
320 				   u32 *resp_len)
321 {
322 	struct opa_node_info *ni;
323 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
324 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
325 
326 	ni = (struct opa_node_info *)data;
327 
328 	/* GUID 0 is illegal */
329 	if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
330 		smp->status |= IB_SMP_INVALID_FIELD;
331 		return reply((struct ib_mad_hdr *)smp);
332 	}
333 
334 	ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
335 	ni->base_version = OPA_MGMT_BASE_VERSION;
336 	ni->class_version = OPA_SMI_CLASS_VERSION;
337 	ni->node_type = 1;     /* channel adapter */
338 	ni->num_ports = ibdev->phys_port_cnt;
339 	/* This is already in network order */
340 	ni->system_image_guid = ib_hfi1_sys_image_guid;
341 	/* Use first-port GUID as node */
342 	ni->node_guid = cpu_to_be64(dd->pport->guid);
343 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
344 	ni->device_id = cpu_to_be16(dd->pcidev->device);
345 	ni->revision = cpu_to_be32(dd->minrev);
346 	ni->local_port_num = port;
347 	ni->vendor_id[0] = dd->oui1;
348 	ni->vendor_id[1] = dd->oui2;
349 	ni->vendor_id[2] = dd->oui3;
350 
351 	if (resp_len)
352 		*resp_len += sizeof(*ni);
353 
354 	return reply((struct ib_mad_hdr *)smp);
355 }
356 
357 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
358 			     u8 port)
359 {
360 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
361 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
362 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
363 
364 	/* GUID 0 is illegal */
365 	if (smp->attr_mod || pidx >= dd->num_pports ||
366 	    dd->pport[pidx].guid == 0)
367 		smp->status |= IB_SMP_INVALID_FIELD;
368 	else
369 		nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
370 
371 	nip->base_version = OPA_MGMT_BASE_VERSION;
372 	nip->class_version = OPA_SMI_CLASS_VERSION;
373 	nip->node_type = 1;     /* channel adapter */
374 	nip->num_ports = ibdev->phys_port_cnt;
375 	/* This is already in network order */
376 	nip->sys_guid = ib_hfi1_sys_image_guid;
377 	 /* Use first-port GUID as node */
378 	nip->node_guid = cpu_to_be64(dd->pport->guid);
379 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
380 	nip->device_id = cpu_to_be16(dd->pcidev->device);
381 	nip->revision = cpu_to_be32(dd->minrev);
382 	nip->local_port_num = port;
383 	nip->vendor_id[0] = dd->oui1;
384 	nip->vendor_id[1] = dd->oui2;
385 	nip->vendor_id[2] = dd->oui3;
386 
387 	return reply((struct ib_mad_hdr *)smp);
388 }
389 
390 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
391 {
392 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
393 }
394 
395 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
396 {
397 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
398 }
399 
400 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
401 {
402 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
403 }
404 
405 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
406 		      int mad_flags, __be64 mkey, __be32 dr_slid,
407 		      u8 return_path[], u8 hop_cnt)
408 {
409 	int valid_mkey = 0;
410 	int ret = 0;
411 
412 	/* Is the mkey in the process of expiring? */
413 	if (ibp->rvp.mkey_lease_timeout &&
414 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
415 		/* Clear timeout and mkey protection field. */
416 		ibp->rvp.mkey_lease_timeout = 0;
417 		ibp->rvp.mkeyprot = 0;
418 	}
419 
420 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
421 	    ibp->rvp.mkey == mkey)
422 		valid_mkey = 1;
423 
424 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
425 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
426 	    (mad->method == IB_MGMT_METHOD_GET ||
427 	     mad->method == IB_MGMT_METHOD_SET ||
428 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
429 		ibp->rvp.mkey_lease_timeout = 0;
430 
431 	if (!valid_mkey) {
432 		switch (mad->method) {
433 		case IB_MGMT_METHOD_GET:
434 			/* Bad mkey not a violation below level 2 */
435 			if (ibp->rvp.mkeyprot < 2)
436 				break;
437 		case IB_MGMT_METHOD_SET:
438 		case IB_MGMT_METHOD_TRAP_REPRESS:
439 			if (ibp->rvp.mkey_violations != 0xFFFF)
440 				++ibp->rvp.mkey_violations;
441 			if (!ibp->rvp.mkey_lease_timeout &&
442 			    ibp->rvp.mkey_lease_period)
443 				ibp->rvp.mkey_lease_timeout = jiffies +
444 					ibp->rvp.mkey_lease_period * HZ;
445 			/* Generate a trap notice. */
446 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
447 				 hop_cnt);
448 			ret = 1;
449 		}
450 	}
451 
452 	return ret;
453 }
454 
455 /*
456  * The SMA caches reads from LCB registers in case the LCB is unavailable.
457  * (The LCB is unavailable in certain link states, for example.)
458  */
459 struct lcb_datum {
460 	u32 off;
461 	u64 val;
462 };
463 
464 static struct lcb_datum lcb_cache[] = {
465 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
466 };
467 
468 static int write_lcb_cache(u32 off, u64 val)
469 {
470 	int i;
471 
472 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
473 		if (lcb_cache[i].off == off) {
474 			lcb_cache[i].val = val;
475 			return 0;
476 		}
477 	}
478 
479 	pr_warn("%s bad offset 0x%x\n", __func__, off);
480 	return -1;
481 }
482 
483 static int read_lcb_cache(u32 off, u64 *val)
484 {
485 	int i;
486 
487 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
488 		if (lcb_cache[i].off == off) {
489 			*val = lcb_cache[i].val;
490 			return 0;
491 		}
492 	}
493 
494 	pr_warn("%s bad offset 0x%x\n", __func__, off);
495 	return -1;
496 }
497 
498 void read_ltp_rtt(struct hfi1_devdata *dd)
499 {
500 	u64 reg;
501 
502 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
503 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
504 	else
505 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
506 }
507 
508 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
509 				   struct ib_device *ibdev, u8 port,
510 				   u32 *resp_len)
511 {
512 	int i;
513 	struct hfi1_devdata *dd;
514 	struct hfi1_pportdata *ppd;
515 	struct hfi1_ibport *ibp;
516 	struct opa_port_info *pi = (struct opa_port_info *)data;
517 	u8 mtu;
518 	u8 credit_rate;
519 	u8 is_beaconing_active;
520 	u32 state;
521 	u32 num_ports = OPA_AM_NPORT(am);
522 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
523 	u32 buffer_units;
524 	u64 tmp = 0;
525 
526 	if (num_ports != 1) {
527 		smp->status |= IB_SMP_INVALID_FIELD;
528 		return reply((struct ib_mad_hdr *)smp);
529 	}
530 
531 	dd = dd_from_ibdev(ibdev);
532 	/* IB numbers ports from 1, hw from 0 */
533 	ppd = dd->pport + (port - 1);
534 	ibp = &ppd->ibport_data;
535 
536 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
537 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
538 		smp->status |= IB_SMP_INVALID_FIELD;
539 		return reply((struct ib_mad_hdr *)smp);
540 	}
541 
542 	pi->lid = cpu_to_be32(ppd->lid);
543 
544 	/* Only return the mkey if the protection field allows it. */
545 	if (!(smp->method == IB_MGMT_METHOD_GET &&
546 	      ibp->rvp.mkey != smp->mkey &&
547 	      ibp->rvp.mkeyprot == 1))
548 		pi->mkey = ibp->rvp.mkey;
549 
550 	pi->subnet_prefix = ibp->rvp.gid_prefix;
551 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
552 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
553 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
554 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
555 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
556 
557 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
558 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
559 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
560 
561 	pi->link_width_downgrade.supported =
562 			cpu_to_be16(ppd->link_width_downgrade_supported);
563 	pi->link_width_downgrade.enabled =
564 			cpu_to_be16(ppd->link_width_downgrade_enabled);
565 	pi->link_width_downgrade.tx_active =
566 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
567 	pi->link_width_downgrade.rx_active =
568 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
569 
570 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
571 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
572 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
573 
574 	state = driver_lstate(ppd);
575 
576 	if (start_of_sm_config && (state == IB_PORT_INIT))
577 		ppd->is_sm_config_started = 1;
578 
579 	pi->port_phys_conf = (ppd->port_type & 0xf);
580 
581 #if PI_LED_ENABLE_SUP
582 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
583 	pi->port_states.ledenable_offlinereason |=
584 		ppd->is_sm_config_started << 5;
585 	/*
586 	 * This pairs with the memory barrier in hfi1_start_led_override to
587 	 * ensure that we read the correct state of LED beaconing represented
588 	 * by led_override_timer_active
589 	 */
590 	smp_rmb();
591 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
592 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
593 	pi->port_states.ledenable_offlinereason |=
594 		ppd->offline_disabled_reason;
595 #else
596 	pi->port_states.offline_reason = ppd->neighbor_normal << 4;
597 	pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
598 	pi->port_states.offline_reason |= ppd->offline_disabled_reason;
599 #endif /* PI_LED_ENABLE_SUP */
600 
601 	pi->port_states.portphysstate_portstate =
602 		(hfi1_ibphys_portstate(ppd) << 4) | state;
603 
604 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
605 
606 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
607 	for (i = 0; i < ppd->vls_supported; i++) {
608 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
609 		if ((i % 2) == 0)
610 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
611 		else
612 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
613 	}
614 	/* don't forget VL 15 */
615 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
616 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
617 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
618 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
619 	pi->partenforce_filterraw |=
620 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
621 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
622 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
623 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
624 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
625 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
626 	/* P_KeyViolations are counted by hardware. */
627 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
628 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
629 
630 	pi->vl.cap = ppd->vls_supported;
631 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
632 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
633 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
634 
635 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
636 
637 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
638 					  OPA_PORT_LINK_MODE_OPA << 5 |
639 					  OPA_PORT_LINK_MODE_OPA);
640 
641 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
642 
643 	pi->port_mode = cpu_to_be16(
644 				ppd->is_active_optimize_enabled ?
645 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
646 
647 	pi->port_packet_format.supported =
648 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
649 	pi->port_packet_format.enabled =
650 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
651 
652 	/* flit_control.interleave is (OPA V1, version .76):
653 	 * bits		use
654 	 * ----		---
655 	 * 2		res
656 	 * 2		DistanceSupported
657 	 * 2		DistanceEnabled
658 	 * 5		MaxNextLevelTxEnabled
659 	 * 5		MaxNestLevelRxSupported
660 	 *
661 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
662 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
663 	 * to 0x1.
664 	 */
665 	pi->flit_control.interleave = cpu_to_be16(0x1400);
666 
667 	pi->link_down_reason = ppd->local_link_down_reason.sma;
668 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
669 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
670 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
671 
672 	/* 32.768 usec. response time (guessing) */
673 	pi->resptimevalue = 3;
674 
675 	pi->local_port_num = port;
676 
677 	/* buffer info for FM */
678 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
679 
680 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
681 	pi->neigh_port_num = ppd->neighbor_port_number;
682 	pi->port_neigh_mode =
683 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
684 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
685 		(ppd->neighbor_fm_security ?
686 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
687 
688 	/* HFIs shall always return VL15 credits to their
689 	 * neighbor in a timely manner, without any credit return pacing.
690 	 */
691 	credit_rate = 0;
692 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
693 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
694 	buffer_units |= (credit_rate << 6) &
695 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
696 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
697 	pi->buffer_units = cpu_to_be32(buffer_units);
698 
699 	pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
700 
701 	/* HFI supports a replay buffer 128 LTPs in size */
702 	pi->replay_depth.buffer = 0x80;
703 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
704 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
705 
706 	/*
707 	 * this counter is 16 bits wide, but the replay_depth.wire
708 	 * variable is only 8 bits
709 	 */
710 	if (tmp > 0xff)
711 		tmp = 0xff;
712 	pi->replay_depth.wire = tmp;
713 
714 	if (resp_len)
715 		*resp_len += sizeof(struct opa_port_info);
716 
717 	return reply((struct ib_mad_hdr *)smp);
718 }
719 
720 /**
721  * get_pkeys - return the PKEY table
722  * @dd: the hfi1_ib device
723  * @port: the IB port number
724  * @pkeys: the pkey table is placed here
725  */
726 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
727 {
728 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
729 
730 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
731 
732 	return 0;
733 }
734 
735 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
736 				    struct ib_device *ibdev, u8 port,
737 				    u32 *resp_len)
738 {
739 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
740 	u32 n_blocks_req = OPA_AM_NBLK(am);
741 	u32 start_block = am & 0x7ff;
742 	__be16 *p;
743 	u16 *q;
744 	int i;
745 	u16 n_blocks_avail;
746 	unsigned npkeys = hfi1_get_npkeys(dd);
747 	size_t size;
748 
749 	if (n_blocks_req == 0) {
750 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
751 			port, start_block, n_blocks_req);
752 		smp->status |= IB_SMP_INVALID_FIELD;
753 		return reply((struct ib_mad_hdr *)smp);
754 	}
755 
756 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
757 
758 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
759 
760 	if (start_block + n_blocks_req > n_blocks_avail ||
761 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
762 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
763 			"avail 0x%x; blk/smp 0x%lx\n",
764 			start_block, n_blocks_req, n_blocks_avail,
765 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
766 		smp->status |= IB_SMP_INVALID_FIELD;
767 		return reply((struct ib_mad_hdr *)smp);
768 	}
769 
770 	p = (__be16 *)data;
771 	q = (u16 *)data;
772 	/* get the real pkeys if we are requesting the first block */
773 	if (start_block == 0) {
774 		get_pkeys(dd, port, q);
775 		for (i = 0; i < npkeys; i++)
776 			p[i] = cpu_to_be16(q[i]);
777 		if (resp_len)
778 			*resp_len += size;
779 	} else {
780 		smp->status |= IB_SMP_INVALID_FIELD;
781 	}
782 	return reply((struct ib_mad_hdr *)smp);
783 }
784 
785 enum {
786 	HFI_TRANSITION_DISALLOWED,
787 	HFI_TRANSITION_IGNORED,
788 	HFI_TRANSITION_ALLOWED,
789 	HFI_TRANSITION_UNDEFINED,
790 };
791 
792 /*
793  * Use shortened names to improve readability of
794  * {logical,physical}_state_transitions
795  */
796 enum {
797 	__D = HFI_TRANSITION_DISALLOWED,
798 	__I = HFI_TRANSITION_IGNORED,
799 	__A = HFI_TRANSITION_ALLOWED,
800 	__U = HFI_TRANSITION_UNDEFINED,
801 };
802 
803 /*
804  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
805  * represented in physical_state_transitions.
806  */
807 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
808 
809 /*
810  * Within physical_state_transitions, rows represent "old" states,
811  * columns "new" states, and physical_state_transitions.allowed[old][new]
812  * indicates if the transition from old state to new state is legal (see
813  * OPAg1v1, Table 6-4).
814  */
815 static const struct {
816 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
817 } physical_state_transitions = {
818 	{
819 		/* 2    3    4    5    6    7    8    9   10   11 */
820 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
821 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
822 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
823 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
824 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
825 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
826 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
827 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
828 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
829 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
830 	}
831 };
832 
833 /*
834  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
835  * logical_state_transitions
836  */
837 
838 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
839 
840 /*
841  * Within logical_state_transitions rows represent "old" states,
842  * columns "new" states, and logical_state_transitions.allowed[old][new]
843  * indicates if the transition from old state to new state is legal (see
844  * OPAg1v1, Table 9-12).
845  */
846 static const struct {
847 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
848 } logical_state_transitions = {
849 	{
850 		/* 1    2    3    4    5 */
851 	/* 1 */	{ __I, __D, __D, __D, __U},
852 	/* 2 */	{ __D, __I, __A, __D, __U},
853 	/* 3 */	{ __D, __D, __I, __A, __U},
854 	/* 4 */	{ __D, __D, __I, __I, __U},
855 	/* 5 */	{ __U, __U, __U, __U, __U},
856 	}
857 };
858 
859 static int logical_transition_allowed(int old, int new)
860 {
861 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
862 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
863 		pr_warn("invalid logical state(s) (old %d new %d)\n",
864 			old, new);
865 		return HFI_TRANSITION_UNDEFINED;
866 	}
867 
868 	if (new == IB_PORT_NOP)
869 		return HFI_TRANSITION_ALLOWED; /* always allowed */
870 
871 	/* adjust states for indexing into logical_state_transitions */
872 	old -= IB_PORT_DOWN;
873 	new -= IB_PORT_DOWN;
874 
875 	if (old < 0 || new < 0)
876 		return HFI_TRANSITION_UNDEFINED;
877 	return logical_state_transitions.allowed[old][new];
878 }
879 
880 static int physical_transition_allowed(int old, int new)
881 {
882 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
883 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
884 		pr_warn("invalid physical state(s) (old %d new %d)\n",
885 			old, new);
886 		return HFI_TRANSITION_UNDEFINED;
887 	}
888 
889 	if (new == IB_PORTPHYSSTATE_NOP)
890 		return HFI_TRANSITION_ALLOWED; /* always allowed */
891 
892 	/* adjust states for indexing into physical_state_transitions */
893 	old -= IB_PORTPHYSSTATE_POLLING;
894 	new -= IB_PORTPHYSSTATE_POLLING;
895 
896 	if (old < 0 || new < 0)
897 		return HFI_TRANSITION_UNDEFINED;
898 	return physical_state_transitions.allowed[old][new];
899 }
900 
901 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
902 					  u32 logical_new, u32 physical_new)
903 {
904 	u32 physical_old = driver_physical_state(ppd);
905 	u32 logical_old = driver_logical_state(ppd);
906 	int ret, logical_allowed, physical_allowed;
907 
908 	ret = logical_transition_allowed(logical_old, logical_new);
909 	logical_allowed = ret;
910 
911 	if (ret == HFI_TRANSITION_DISALLOWED ||
912 	    ret == HFI_TRANSITION_UNDEFINED) {
913 		pr_warn("invalid logical state transition %s -> %s\n",
914 			opa_lstate_name(logical_old),
915 			opa_lstate_name(logical_new));
916 		return ret;
917 	}
918 
919 	ret = physical_transition_allowed(physical_old, physical_new);
920 	physical_allowed = ret;
921 
922 	if (ret == HFI_TRANSITION_DISALLOWED ||
923 	    ret == HFI_TRANSITION_UNDEFINED) {
924 		pr_warn("invalid physical state transition %s -> %s\n",
925 			opa_pstate_name(physical_old),
926 			opa_pstate_name(physical_new));
927 		return ret;
928 	}
929 
930 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
931 	    physical_allowed == HFI_TRANSITION_IGNORED)
932 		return HFI_TRANSITION_IGNORED;
933 
934 	/*
935 	 * A change request of Physical Port State from
936 	 * 'Offline' to 'Polling' should be ignored.
937 	 */
938 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
939 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
940 		return HFI_TRANSITION_IGNORED;
941 
942 	/*
943 	 * Either physical_allowed or logical_allowed is
944 	 * HFI_TRANSITION_ALLOWED.
945 	 */
946 	return HFI_TRANSITION_ALLOWED;
947 }
948 
949 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
950 			   u32 logical_state, u32 phys_state,
951 			   int suppress_idle_sma)
952 {
953 	struct hfi1_devdata *dd = ppd->dd;
954 	u32 link_state;
955 	int ret;
956 
957 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
958 	if (ret == HFI_TRANSITION_DISALLOWED ||
959 	    ret == HFI_TRANSITION_UNDEFINED) {
960 		/* error message emitted above */
961 		smp->status |= IB_SMP_INVALID_FIELD;
962 		return 0;
963 	}
964 
965 	if (ret == HFI_TRANSITION_IGNORED)
966 		return 0;
967 
968 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
969 	    !(logical_state == IB_PORT_DOWN ||
970 	      logical_state == IB_PORT_NOP)){
971 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
972 			logical_state, phys_state);
973 		smp->status |= IB_SMP_INVALID_FIELD;
974 	}
975 
976 	/*
977 	 * Logical state changes are summarized in OPAv1g1 spec.,
978 	 * Table 9-12; physical state changes are summarized in
979 	 * OPAv1g1 spec., Table 6.4.
980 	 */
981 	switch (logical_state) {
982 	case IB_PORT_NOP:
983 		if (phys_state == IB_PORTPHYSSTATE_NOP)
984 			break;
985 		/* FALLTHROUGH */
986 	case IB_PORT_DOWN:
987 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
988 			link_state = HLS_DN_DOWNDEF;
989 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
990 			link_state = HLS_DN_POLL;
991 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
992 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
993 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
994 			link_state = HLS_DN_DISABLE;
995 		} else {
996 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
997 				phys_state);
998 			smp->status |= IB_SMP_INVALID_FIELD;
999 			break;
1000 		}
1001 
1002 		if ((link_state == HLS_DN_POLL ||
1003 		     link_state == HLS_DN_DOWNDEF)) {
1004 			/*
1005 			 * Going to poll.  No matter what the current state,
1006 			 * always move offline first, then tune and start the
1007 			 * link.  This correctly handles a FM link bounce and
1008 			 * a link enable.  Going offline is a no-op if already
1009 			 * offline.
1010 			 */
1011 			set_link_state(ppd, HLS_DN_OFFLINE);
1012 			tune_serdes(ppd);
1013 			start_link(ppd);
1014 		} else {
1015 			set_link_state(ppd, link_state);
1016 		}
1017 		if (link_state == HLS_DN_DISABLE &&
1018 		    (ppd->offline_disabled_reason >
1019 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1020 		     ppd->offline_disabled_reason ==
1021 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1022 			ppd->offline_disabled_reason =
1023 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1024 		/*
1025 		 * Don't send a reply if the response would be sent
1026 		 * through the disabled port.
1027 		 */
1028 		if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1029 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1030 		break;
1031 	case IB_PORT_ARMED:
1032 		ret = set_link_state(ppd, HLS_UP_ARMED);
1033 		if ((ret == 0) && (suppress_idle_sma == 0))
1034 			send_idle_sma(dd, SMA_IDLE_ARM);
1035 		break;
1036 	case IB_PORT_ACTIVE:
1037 		if (ppd->neighbor_normal) {
1038 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1039 			if (ret == 0)
1040 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1041 		} else {
1042 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1043 			smp->status |= IB_SMP_INVALID_FIELD;
1044 		}
1045 		break;
1046 	default:
1047 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1048 			logical_state);
1049 		smp->status |= IB_SMP_INVALID_FIELD;
1050 	}
1051 
1052 	return 0;
1053 }
1054 
1055 /**
1056  * subn_set_opa_portinfo - set port information
1057  * @smp: the incoming SM packet
1058  * @ibdev: the infiniband device
1059  * @port: the port on the device
1060  *
1061  */
1062 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1063 				   struct ib_device *ibdev, u8 port,
1064 				   u32 *resp_len)
1065 {
1066 	struct opa_port_info *pi = (struct opa_port_info *)data;
1067 	struct ib_event event;
1068 	struct hfi1_devdata *dd;
1069 	struct hfi1_pportdata *ppd;
1070 	struct hfi1_ibport *ibp;
1071 	u8 clientrereg;
1072 	unsigned long flags;
1073 	u32 smlid, opa_lid; /* tmp vars to hold LID values */
1074 	u16 lid;
1075 	u8 ls_old, ls_new, ps_new;
1076 	u8 vls;
1077 	u8 msl;
1078 	u8 crc_enabled;
1079 	u16 lse, lwe, mtu;
1080 	u32 num_ports = OPA_AM_NPORT(am);
1081 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1082 	int ret, i, invalid = 0, call_set_mtu = 0;
1083 	int call_link_downgrade_policy = 0;
1084 
1085 	if (num_ports != 1) {
1086 		smp->status |= IB_SMP_INVALID_FIELD;
1087 		return reply((struct ib_mad_hdr *)smp);
1088 	}
1089 
1090 	opa_lid = be32_to_cpu(pi->lid);
1091 	if (opa_lid & 0xFFFF0000) {
1092 		pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1093 		smp->status |= IB_SMP_INVALID_FIELD;
1094 		goto get_only;
1095 	}
1096 
1097 	lid = (u16)(opa_lid & 0x0000FFFF);
1098 
1099 	smlid = be32_to_cpu(pi->sm_lid);
1100 	if (smlid & 0xFFFF0000) {
1101 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1102 		smp->status |= IB_SMP_INVALID_FIELD;
1103 		goto get_only;
1104 	}
1105 	smlid &= 0x0000FFFF;
1106 
1107 	clientrereg = (pi->clientrereg_subnettimeout &
1108 			OPA_PI_MASK_CLIENT_REREGISTER);
1109 
1110 	dd = dd_from_ibdev(ibdev);
1111 	/* IB numbers ports from 1, hw from 0 */
1112 	ppd = dd->pport + (port - 1);
1113 	ibp = &ppd->ibport_data;
1114 	event.device = ibdev;
1115 	event.element.port_num = port;
1116 
1117 	ls_old = driver_lstate(ppd);
1118 
1119 	ibp->rvp.mkey = pi->mkey;
1120 	ibp->rvp.gid_prefix = pi->subnet_prefix;
1121 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1122 
1123 	/* Must be a valid unicast LID address. */
1124 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1125 	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1126 		smp->status |= IB_SMP_INVALID_FIELD;
1127 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1128 			lid);
1129 	} else if (ppd->lid != lid ||
1130 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1131 		if (ppd->lid != lid)
1132 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1133 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1134 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1135 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1136 		event.event = IB_EVENT_LID_CHANGE;
1137 		ib_dispatch_event(&event);
1138 	}
1139 
1140 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1141 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1142 		ppd->linkinit_reason =
1143 			(pi->partenforce_filterraw &
1144 			 OPA_PI_MASK_LINKINIT_REASON);
1145 	/* enable/disable SW pkey checking as per FM control */
1146 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
1147 		ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
1148 	else
1149 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
1150 
1151 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
1152 		ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
1153 	else
1154 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
1155 
1156 	/* Must be a valid unicast LID address. */
1157 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1158 	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1159 		smp->status |= IB_SMP_INVALID_FIELD;
1160 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1161 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1162 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1163 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1164 		if (ibp->rvp.sm_ah) {
1165 			if (smlid != ibp->rvp.sm_lid)
1166 				ibp->rvp.sm_ah->attr.dlid = smlid;
1167 			if (msl != ibp->rvp.sm_sl)
1168 				ibp->rvp.sm_ah->attr.sl = msl;
1169 		}
1170 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1171 		if (smlid != ibp->rvp.sm_lid)
1172 			ibp->rvp.sm_lid = smlid;
1173 		if (msl != ibp->rvp.sm_sl)
1174 			ibp->rvp.sm_sl = msl;
1175 		event.event = IB_EVENT_SM_CHANGE;
1176 		ib_dispatch_event(&event);
1177 	}
1178 
1179 	if (pi->link_down_reason == 0) {
1180 		ppd->local_link_down_reason.sma = 0;
1181 		ppd->local_link_down_reason.latest = 0;
1182 	}
1183 
1184 	if (pi->neigh_link_down_reason == 0) {
1185 		ppd->neigh_link_down_reason.sma = 0;
1186 		ppd->neigh_link_down_reason.latest = 0;
1187 	}
1188 
1189 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1190 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1191 
1192 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1193 	lwe = be16_to_cpu(pi->link_width.enabled);
1194 	if (lwe) {
1195 		if (lwe == OPA_LINK_WIDTH_RESET ||
1196 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1197 			set_link_width_enabled(ppd, ppd->link_width_supported);
1198 		else if ((lwe & ~ppd->link_width_supported) == 0)
1199 			set_link_width_enabled(ppd, lwe);
1200 		else
1201 			smp->status |= IB_SMP_INVALID_FIELD;
1202 	}
1203 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1204 	/* LWD.E is always applied - 0 means "disabled" */
1205 	if (lwe == OPA_LINK_WIDTH_RESET ||
1206 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1207 		set_link_width_downgrade_enabled(ppd,
1208 						 ppd->
1209 						 link_width_downgrade_supported
1210 						 );
1211 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1212 		/* only set and apply if something changed */
1213 		if (lwe != ppd->link_width_downgrade_enabled) {
1214 			set_link_width_downgrade_enabled(ppd, lwe);
1215 			call_link_downgrade_policy = 1;
1216 		}
1217 	} else {
1218 		smp->status |= IB_SMP_INVALID_FIELD;
1219 	}
1220 	lse = be16_to_cpu(pi->link_speed.enabled);
1221 	if (lse) {
1222 		if (lse & be16_to_cpu(pi->link_speed.supported))
1223 			set_link_speed_enabled(ppd, lse);
1224 		else
1225 			smp->status |= IB_SMP_INVALID_FIELD;
1226 	}
1227 
1228 	ibp->rvp.mkeyprot =
1229 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1230 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1231 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1232 				    ibp->rvp.vl_high_limit);
1233 
1234 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1235 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1236 		smp->status |= IB_SMP_INVALID_FIELD;
1237 		return reply((struct ib_mad_hdr *)smp);
1238 	}
1239 	for (i = 0; i < ppd->vls_supported; i++) {
1240 		if ((i % 2) == 0)
1241 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1242 					   4) & 0xF);
1243 		else
1244 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1245 					  0xF);
1246 		if (mtu == 0xffff) {
1247 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1248 				mtu,
1249 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1250 			smp->status |= IB_SMP_INVALID_FIELD;
1251 			mtu = hfi1_max_mtu; /* use a valid MTU */
1252 		}
1253 		if (dd->vld[i].mtu != mtu) {
1254 			dd_dev_info(dd,
1255 				    "MTU change on vl %d from %d to %d\n",
1256 				    i, dd->vld[i].mtu, mtu);
1257 			dd->vld[i].mtu = mtu;
1258 			call_set_mtu++;
1259 		}
1260 	}
1261 	/* As per OPAV1 spec: VL15 must support and be configured
1262 	 * for operation with a 2048 or larger MTU.
1263 	 */
1264 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1265 	if (mtu < 2048 || mtu == 0xffff)
1266 		mtu = 2048;
1267 	if (dd->vld[15].mtu != mtu) {
1268 		dd_dev_info(dd,
1269 			    "MTU change on vl 15 from %d to %d\n",
1270 			    dd->vld[15].mtu, mtu);
1271 		dd->vld[15].mtu = mtu;
1272 		call_set_mtu++;
1273 	}
1274 	if (call_set_mtu)
1275 		set_mtu(ppd);
1276 
1277 	/* Set operational VLs */
1278 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1279 	if (vls) {
1280 		if (vls > ppd->vls_supported) {
1281 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1282 				pi->operational_vls);
1283 			smp->status |= IB_SMP_INVALID_FIELD;
1284 		} else {
1285 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1286 					    vls) == -EINVAL)
1287 				smp->status |= IB_SMP_INVALID_FIELD;
1288 		}
1289 	}
1290 
1291 	if (pi->mkey_violations == 0)
1292 		ibp->rvp.mkey_violations = 0;
1293 
1294 	if (pi->pkey_violations == 0)
1295 		ibp->rvp.pkey_violations = 0;
1296 
1297 	if (pi->qkey_violations == 0)
1298 		ibp->rvp.qkey_violations = 0;
1299 
1300 	ibp->rvp.subnet_timeout =
1301 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1302 
1303 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1304 	crc_enabled >>= 4;
1305 	crc_enabled &= 0xf;
1306 
1307 	if (crc_enabled != 0)
1308 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1309 
1310 	ppd->is_active_optimize_enabled =
1311 			!!(be16_to_cpu(pi->port_mode)
1312 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1313 
1314 	ls_new = pi->port_states.portphysstate_portstate &
1315 			OPA_PI_MASK_PORT_STATE;
1316 	ps_new = (pi->port_states.portphysstate_portstate &
1317 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1318 
1319 	if (ls_old == IB_PORT_INIT) {
1320 		if (start_of_sm_config) {
1321 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1322 				ppd->is_sm_config_started = 1;
1323 		} else if (ls_new == IB_PORT_ARMED) {
1324 			if (ppd->is_sm_config_started == 0)
1325 				invalid = 1;
1326 		}
1327 	}
1328 
1329 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1330 	if (clientrereg) {
1331 		event.event = IB_EVENT_CLIENT_REREGISTER;
1332 		ib_dispatch_event(&event);
1333 	}
1334 
1335 	/*
1336 	 * Do the port state change now that the other link parameters
1337 	 * have been set.
1338 	 * Changing the port physical state only makes sense if the link
1339 	 * is down or is being set to down.
1340 	 */
1341 
1342 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1343 	if (ret)
1344 		return ret;
1345 
1346 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1347 
1348 	/* restore re-reg bit per o14-12.2.1 */
1349 	pi->clientrereg_subnettimeout |= clientrereg;
1350 
1351 	/*
1352 	 * Apply the new link downgrade policy.  This may result in a link
1353 	 * bounce.  Do this after everything else so things are settled.
1354 	 * Possible problem: if setting the port state above fails, then
1355 	 * the policy change is not applied.
1356 	 */
1357 	if (call_link_downgrade_policy)
1358 		apply_link_downgrade_policy(ppd, 0);
1359 
1360 	return ret;
1361 
1362 get_only:
1363 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1364 }
1365 
1366 /**
1367  * set_pkeys - set the PKEY table for ctxt 0
1368  * @dd: the hfi1_ib device
1369  * @port: the IB port number
1370  * @pkeys: the PKEY table
1371  */
1372 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1373 {
1374 	struct hfi1_pportdata *ppd;
1375 	int i;
1376 	int changed = 0;
1377 	int update_includes_mgmt_partition = 0;
1378 
1379 	/*
1380 	 * IB port one/two always maps to context zero/one,
1381 	 * always a kernel context, no locking needed
1382 	 * If we get here with ppd setup, no need to check
1383 	 * that rcd is valid.
1384 	 */
1385 	ppd = dd->pport + (port - 1);
1386 	/*
1387 	 * If the update does not include the management pkey, don't do it.
1388 	 */
1389 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1390 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1391 			update_includes_mgmt_partition = 1;
1392 			break;
1393 		}
1394 	}
1395 
1396 	if (!update_includes_mgmt_partition)
1397 		return 1;
1398 
1399 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1400 		u16 key = pkeys[i];
1401 		u16 okey = ppd->pkeys[i];
1402 
1403 		if (key == okey)
1404 			continue;
1405 		/*
1406 		 * Don't update pkeys[2], if an HFI port without MgmtAllowed
1407 		 * by neighbor is a switch.
1408 		 */
1409 		if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
1410 			continue;
1411 		/*
1412 		 * The SM gives us the complete PKey table. We have
1413 		 * to ensure that we put the PKeys in the matching
1414 		 * slots.
1415 		 */
1416 		ppd->pkeys[i] = key;
1417 		changed = 1;
1418 	}
1419 
1420 	if (changed) {
1421 		struct ib_event event;
1422 
1423 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1424 
1425 		event.event = IB_EVENT_PKEY_CHANGE;
1426 		event.device = &dd->verbs_dev.rdi.ibdev;
1427 		event.element.port_num = port;
1428 		ib_dispatch_event(&event);
1429 	}
1430 	return 0;
1431 }
1432 
1433 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1434 				    struct ib_device *ibdev, u8 port,
1435 				    u32 *resp_len)
1436 {
1437 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1438 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1439 	u32 start_block = am & 0x7ff;
1440 	u16 *p = (u16 *)data;
1441 	__be16 *q = (__be16 *)data;
1442 	int i;
1443 	u16 n_blocks_avail;
1444 	unsigned npkeys = hfi1_get_npkeys(dd);
1445 
1446 	if (n_blocks_sent == 0) {
1447 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1448 			port, start_block, n_blocks_sent);
1449 		smp->status |= IB_SMP_INVALID_FIELD;
1450 		return reply((struct ib_mad_hdr *)smp);
1451 	}
1452 
1453 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1454 
1455 	if (start_block + n_blocks_sent > n_blocks_avail ||
1456 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1457 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1458 			start_block, n_blocks_sent, n_blocks_avail,
1459 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1460 		smp->status |= IB_SMP_INVALID_FIELD;
1461 		return reply((struct ib_mad_hdr *)smp);
1462 	}
1463 
1464 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1465 		p[i] = be16_to_cpu(q[i]);
1466 
1467 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1468 		smp->status |= IB_SMP_INVALID_FIELD;
1469 		return reply((struct ib_mad_hdr *)smp);
1470 	}
1471 
1472 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1473 }
1474 
1475 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1476 {
1477 	u64 *val = data;
1478 
1479 	*val++ = read_csr(dd, SEND_SC2VLT0);
1480 	*val++ = read_csr(dd, SEND_SC2VLT1);
1481 	*val++ = read_csr(dd, SEND_SC2VLT2);
1482 	*val++ = read_csr(dd, SEND_SC2VLT3);
1483 	return 0;
1484 }
1485 
1486 #define ILLEGAL_VL 12
1487 /*
1488  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1489  * for SC15, which must map to VL15). If we don't remap things this
1490  * way it is possible for VL15 counters to increment when we try to
1491  * send on a SC which is mapped to an invalid VL.
1492  */
1493 static void filter_sc2vlt(void *data)
1494 {
1495 	int i;
1496 	u8 *pd = data;
1497 
1498 	for (i = 0; i < OPA_MAX_SCS; i++) {
1499 		if (i == 15)
1500 			continue;
1501 		if ((pd[i] & 0x1f) == 0xf)
1502 			pd[i] = ILLEGAL_VL;
1503 	}
1504 }
1505 
1506 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1507 {
1508 	u64 *val = data;
1509 
1510 	filter_sc2vlt(data);
1511 
1512 	write_csr(dd, SEND_SC2VLT0, *val++);
1513 	write_csr(dd, SEND_SC2VLT1, *val++);
1514 	write_csr(dd, SEND_SC2VLT2, *val++);
1515 	write_csr(dd, SEND_SC2VLT3, *val++);
1516 	write_seqlock_irq(&dd->sc2vl_lock);
1517 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1518 	write_sequnlock_irq(&dd->sc2vl_lock);
1519 	return 0;
1520 }
1521 
1522 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1523 				   struct ib_device *ibdev, u8 port,
1524 				   u32 *resp_len)
1525 {
1526 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1527 	u8 *p = data;
1528 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1529 	unsigned i;
1530 
1531 	if (am) {
1532 		smp->status |= IB_SMP_INVALID_FIELD;
1533 		return reply((struct ib_mad_hdr *)smp);
1534 	}
1535 
1536 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1537 		*p++ = ibp->sl_to_sc[i];
1538 
1539 	if (resp_len)
1540 		*resp_len += size;
1541 
1542 	return reply((struct ib_mad_hdr *)smp);
1543 }
1544 
1545 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1546 				   struct ib_device *ibdev, u8 port,
1547 				   u32 *resp_len)
1548 {
1549 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1550 	u8 *p = data;
1551 	int i;
1552 	u8 sc;
1553 
1554 	if (am) {
1555 		smp->status |= IB_SMP_INVALID_FIELD;
1556 		return reply((struct ib_mad_hdr *)smp);
1557 	}
1558 
1559 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1560 		sc = *p++;
1561 		if (ibp->sl_to_sc[i] != sc) {
1562 			ibp->sl_to_sc[i] = sc;
1563 
1564 			/* Put all stale qps into error state */
1565 			hfi1_error_port_qps(ibp, i);
1566 		}
1567 	}
1568 
1569 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1570 }
1571 
1572 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1573 				   struct ib_device *ibdev, u8 port,
1574 				   u32 *resp_len)
1575 {
1576 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1577 	u8 *p = data;
1578 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1579 	unsigned i;
1580 
1581 	if (am) {
1582 		smp->status |= IB_SMP_INVALID_FIELD;
1583 		return reply((struct ib_mad_hdr *)smp);
1584 	}
1585 
1586 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1587 		*p++ = ibp->sc_to_sl[i];
1588 
1589 	if (resp_len)
1590 		*resp_len += size;
1591 
1592 	return reply((struct ib_mad_hdr *)smp);
1593 }
1594 
1595 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1596 				   struct ib_device *ibdev, u8 port,
1597 				   u32 *resp_len)
1598 {
1599 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1600 	u8 *p = data;
1601 	int i;
1602 
1603 	if (am) {
1604 		smp->status |= IB_SMP_INVALID_FIELD;
1605 		return reply((struct ib_mad_hdr *)smp);
1606 	}
1607 
1608 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1609 		ibp->sc_to_sl[i] = *p++;
1610 
1611 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1612 }
1613 
1614 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1615 				    struct ib_device *ibdev, u8 port,
1616 				    u32 *resp_len)
1617 {
1618 	u32 n_blocks = OPA_AM_NBLK(am);
1619 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1620 	void *vp = (void *)data;
1621 	size_t size = 4 * sizeof(u64);
1622 
1623 	if (n_blocks != 1) {
1624 		smp->status |= IB_SMP_INVALID_FIELD;
1625 		return reply((struct ib_mad_hdr *)smp);
1626 	}
1627 
1628 	get_sc2vlt_tables(dd, vp);
1629 
1630 	if (resp_len)
1631 		*resp_len += size;
1632 
1633 	return reply((struct ib_mad_hdr *)smp);
1634 }
1635 
1636 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1637 				    struct ib_device *ibdev, u8 port,
1638 				    u32 *resp_len)
1639 {
1640 	u32 n_blocks = OPA_AM_NBLK(am);
1641 	int async_update = OPA_AM_ASYNC(am);
1642 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1643 	void *vp = (void *)data;
1644 	struct hfi1_pportdata *ppd;
1645 	int lstate;
1646 
1647 	if (n_blocks != 1 || async_update) {
1648 		smp->status |= IB_SMP_INVALID_FIELD;
1649 		return reply((struct ib_mad_hdr *)smp);
1650 	}
1651 
1652 	/* IB numbers ports from 1, hw from 0 */
1653 	ppd = dd->pport + (port - 1);
1654 	lstate = driver_lstate(ppd);
1655 	/*
1656 	 * it's known that async_update is 0 by this point, but include
1657 	 * the explicit check for clarity
1658 	 */
1659 	if (!async_update &&
1660 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1661 		smp->status |= IB_SMP_INVALID_FIELD;
1662 		return reply((struct ib_mad_hdr *)smp);
1663 	}
1664 
1665 	set_sc2vlt_tables(dd, vp);
1666 
1667 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1668 }
1669 
1670 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1671 				     struct ib_device *ibdev, u8 port,
1672 				     u32 *resp_len)
1673 {
1674 	u32 n_blocks = OPA_AM_NPORT(am);
1675 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1676 	struct hfi1_pportdata *ppd;
1677 	void *vp = (void *)data;
1678 	int size;
1679 
1680 	if (n_blocks != 1) {
1681 		smp->status |= IB_SMP_INVALID_FIELD;
1682 		return reply((struct ib_mad_hdr *)smp);
1683 	}
1684 
1685 	ppd = dd->pport + (port - 1);
1686 
1687 	size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1688 
1689 	if (resp_len)
1690 		*resp_len += size;
1691 
1692 	return reply((struct ib_mad_hdr *)smp);
1693 }
1694 
1695 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1696 				     struct ib_device *ibdev, u8 port,
1697 				     u32 *resp_len)
1698 {
1699 	u32 n_blocks = OPA_AM_NPORT(am);
1700 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1701 	struct hfi1_pportdata *ppd;
1702 	void *vp = (void *)data;
1703 	int lstate;
1704 
1705 	if (n_blocks != 1) {
1706 		smp->status |= IB_SMP_INVALID_FIELD;
1707 		return reply((struct ib_mad_hdr *)smp);
1708 	}
1709 
1710 	/* IB numbers ports from 1, hw from 0 */
1711 	ppd = dd->pport + (port - 1);
1712 	lstate = driver_lstate(ppd);
1713 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1714 		smp->status |= IB_SMP_INVALID_FIELD;
1715 		return reply((struct ib_mad_hdr *)smp);
1716 	}
1717 
1718 	ppd = dd->pport + (port - 1);
1719 
1720 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1721 
1722 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1723 					 resp_len);
1724 }
1725 
1726 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1727 			      struct ib_device *ibdev, u8 port,
1728 			      u32 *resp_len)
1729 {
1730 	u32 nports = OPA_AM_NPORT(am);
1731 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1732 	u32 lstate;
1733 	struct hfi1_ibport *ibp;
1734 	struct hfi1_pportdata *ppd;
1735 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1736 
1737 	if (nports != 1) {
1738 		smp->status |= IB_SMP_INVALID_FIELD;
1739 		return reply((struct ib_mad_hdr *)smp);
1740 	}
1741 
1742 	ibp = to_iport(ibdev, port);
1743 	ppd = ppd_from_ibp(ibp);
1744 
1745 	lstate = driver_lstate(ppd);
1746 
1747 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
1748 		ppd->is_sm_config_started = 1;
1749 
1750 #if PI_LED_ENABLE_SUP
1751 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1752 	psi->port_states.ledenable_offlinereason |=
1753 		ppd->is_sm_config_started << 5;
1754 	psi->port_states.ledenable_offlinereason |=
1755 		ppd->offline_disabled_reason;
1756 #else
1757 	psi->port_states.offline_reason = ppd->neighbor_normal << 4;
1758 	psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
1759 	psi->port_states.offline_reason |= ppd->offline_disabled_reason;
1760 #endif /* PI_LED_ENABLE_SUP */
1761 
1762 	psi->port_states.portphysstate_portstate =
1763 		(hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1764 	psi->link_width_downgrade_tx_active =
1765 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
1766 	psi->link_width_downgrade_rx_active =
1767 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
1768 	if (resp_len)
1769 		*resp_len += sizeof(struct opa_port_state_info);
1770 
1771 	return reply((struct ib_mad_hdr *)smp);
1772 }
1773 
1774 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1775 			      struct ib_device *ibdev, u8 port,
1776 			      u32 *resp_len)
1777 {
1778 	u32 nports = OPA_AM_NPORT(am);
1779 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1780 	u32 ls_old;
1781 	u8 ls_new, ps_new;
1782 	struct hfi1_ibport *ibp;
1783 	struct hfi1_pportdata *ppd;
1784 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1785 	int ret, invalid = 0;
1786 
1787 	if (nports != 1) {
1788 		smp->status |= IB_SMP_INVALID_FIELD;
1789 		return reply((struct ib_mad_hdr *)smp);
1790 	}
1791 
1792 	ibp = to_iport(ibdev, port);
1793 	ppd = ppd_from_ibp(ibp);
1794 
1795 	ls_old = driver_lstate(ppd);
1796 
1797 	ls_new = port_states_to_logical_state(&psi->port_states);
1798 	ps_new = port_states_to_phys_state(&psi->port_states);
1799 
1800 	if (ls_old == IB_PORT_INIT) {
1801 		if (start_of_sm_config) {
1802 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1803 				ppd->is_sm_config_started = 1;
1804 		} else if (ls_new == IB_PORT_ARMED) {
1805 			if (ppd->is_sm_config_started == 0)
1806 				invalid = 1;
1807 		}
1808 	}
1809 
1810 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1811 	if (ret)
1812 		return ret;
1813 
1814 	if (invalid)
1815 		smp->status |= IB_SMP_INVALID_FIELD;
1816 
1817 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1818 }
1819 
1820 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1821 				     struct ib_device *ibdev, u8 port,
1822 				     u32 *resp_len)
1823 {
1824 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1825 	u32 addr = OPA_AM_CI_ADDR(am);
1826 	u32 len = OPA_AM_CI_LEN(am) + 1;
1827 	int ret;
1828 
1829 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1830 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1831 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1832 
1833 	/*
1834 	 * check that addr is within spec, and
1835 	 * addr and (addr + len - 1) are on the same "page"
1836 	 */
1837 	if (addr >= 4096 ||
1838 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1839 		smp->status |= IB_SMP_INVALID_FIELD;
1840 		return reply((struct ib_mad_hdr *)smp);
1841 	}
1842 
1843 	ret = get_cable_info(dd, port, addr, len, data);
1844 
1845 	if (ret == -ENODEV) {
1846 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1847 		return reply((struct ib_mad_hdr *)smp);
1848 	}
1849 
1850 	/* The address range for the CableInfo SMA query is wider than the
1851 	 * memory available on the QSFP cable. We want to return a valid
1852 	 * response, albeit zeroed out, for address ranges beyond available
1853 	 * memory but that are within the CableInfo query spec
1854 	 */
1855 	if (ret < 0 && ret != -ERANGE) {
1856 		smp->status |= IB_SMP_INVALID_FIELD;
1857 		return reply((struct ib_mad_hdr *)smp);
1858 	}
1859 
1860 	if (resp_len)
1861 		*resp_len += len;
1862 
1863 	return reply((struct ib_mad_hdr *)smp);
1864 }
1865 
1866 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1867 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1868 {
1869 	u32 num_ports = OPA_AM_NPORT(am);
1870 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1871 	struct hfi1_pportdata *ppd;
1872 	struct buffer_control *p = (struct buffer_control *)data;
1873 	int size;
1874 
1875 	if (num_ports != 1) {
1876 		smp->status |= IB_SMP_INVALID_FIELD;
1877 		return reply((struct ib_mad_hdr *)smp);
1878 	}
1879 
1880 	ppd = dd->pport + (port - 1);
1881 	size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1882 	trace_bct_get(dd, p);
1883 	if (resp_len)
1884 		*resp_len += size;
1885 
1886 	return reply((struct ib_mad_hdr *)smp);
1887 }
1888 
1889 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1890 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1891 {
1892 	u32 num_ports = OPA_AM_NPORT(am);
1893 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1894 	struct hfi1_pportdata *ppd;
1895 	struct buffer_control *p = (struct buffer_control *)data;
1896 
1897 	if (num_ports != 1) {
1898 		smp->status |= IB_SMP_INVALID_FIELD;
1899 		return reply((struct ib_mad_hdr *)smp);
1900 	}
1901 	ppd = dd->pport + (port - 1);
1902 	trace_bct_set(dd, p);
1903 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1904 		smp->status |= IB_SMP_INVALID_FIELD;
1905 		return reply((struct ib_mad_hdr *)smp);
1906 	}
1907 
1908 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1909 }
1910 
1911 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1912 				 struct ib_device *ibdev, u8 port,
1913 				 u32 *resp_len)
1914 {
1915 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1916 	u32 num_ports = OPA_AM_NPORT(am);
1917 	u8 section = (am & 0x00ff0000) >> 16;
1918 	u8 *p = data;
1919 	int size = 0;
1920 
1921 	if (num_ports != 1) {
1922 		smp->status |= IB_SMP_INVALID_FIELD;
1923 		return reply((struct ib_mad_hdr *)smp);
1924 	}
1925 
1926 	switch (section) {
1927 	case OPA_VLARB_LOW_ELEMENTS:
1928 		size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1929 		break;
1930 	case OPA_VLARB_HIGH_ELEMENTS:
1931 		size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1932 		break;
1933 	case OPA_VLARB_PREEMPT_ELEMENTS:
1934 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1935 		break;
1936 	case OPA_VLARB_PREEMPT_MATRIX:
1937 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1938 		break;
1939 	default:
1940 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1941 			be32_to_cpu(smp->attr_mod));
1942 		smp->status |= IB_SMP_INVALID_FIELD;
1943 		break;
1944 	}
1945 
1946 	if (size > 0 && resp_len)
1947 		*resp_len += size;
1948 
1949 	return reply((struct ib_mad_hdr *)smp);
1950 }
1951 
1952 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1953 				 struct ib_device *ibdev, u8 port,
1954 				 u32 *resp_len)
1955 {
1956 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1957 	u32 num_ports = OPA_AM_NPORT(am);
1958 	u8 section = (am & 0x00ff0000) >> 16;
1959 	u8 *p = data;
1960 
1961 	if (num_ports != 1) {
1962 		smp->status |= IB_SMP_INVALID_FIELD;
1963 		return reply((struct ib_mad_hdr *)smp);
1964 	}
1965 
1966 	switch (section) {
1967 	case OPA_VLARB_LOW_ELEMENTS:
1968 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1969 		break;
1970 	case OPA_VLARB_HIGH_ELEMENTS:
1971 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1972 		break;
1973 	/*
1974 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1975 	 * can be changed from the default values
1976 	 */
1977 	case OPA_VLARB_PREEMPT_ELEMENTS:
1978 		/* FALLTHROUGH */
1979 	case OPA_VLARB_PREEMPT_MATRIX:
1980 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1981 		break;
1982 	default:
1983 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1984 			be32_to_cpu(smp->attr_mod));
1985 		smp->status |= IB_SMP_INVALID_FIELD;
1986 		break;
1987 	}
1988 
1989 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1990 }
1991 
1992 struct opa_pma_mad {
1993 	struct ib_mad_hdr mad_hdr;
1994 	u8 data[2024];
1995 } __packed;
1996 
1997 struct opa_class_port_info {
1998 	u8 base_version;
1999 	u8 class_version;
2000 	__be16 cap_mask;
2001 	__be32 cap_mask2_resp_time;
2002 
2003 	u8 redirect_gid[16];
2004 	__be32 redirect_tc_fl;
2005 	__be32 redirect_lid;
2006 	__be32 redirect_sl_qp;
2007 	__be32 redirect_qkey;
2008 
2009 	u8 trap_gid[16];
2010 	__be32 trap_tc_fl;
2011 	__be32 trap_lid;
2012 	__be32 trap_hl_qp;
2013 	__be32 trap_qkey;
2014 
2015 	__be16 trap_pkey;
2016 	__be16 redirect_pkey;
2017 
2018 	u8 trap_sl_rsvd;
2019 	u8 reserved[3];
2020 } __packed;
2021 
2022 struct opa_port_status_req {
2023 	__u8 port_num;
2024 	__u8 reserved[3];
2025 	__be32 vl_select_mask;
2026 };
2027 
2028 #define VL_MASK_ALL		0x000080ff
2029 
2030 struct opa_port_status_rsp {
2031 	__u8 port_num;
2032 	__u8 reserved[3];
2033 	__be32  vl_select_mask;
2034 
2035 	/* Data counters */
2036 	__be64 port_xmit_data;
2037 	__be64 port_rcv_data;
2038 	__be64 port_xmit_pkts;
2039 	__be64 port_rcv_pkts;
2040 	__be64 port_multicast_xmit_pkts;
2041 	__be64 port_multicast_rcv_pkts;
2042 	__be64 port_xmit_wait;
2043 	__be64 sw_port_congestion;
2044 	__be64 port_rcv_fecn;
2045 	__be64 port_rcv_becn;
2046 	__be64 port_xmit_time_cong;
2047 	__be64 port_xmit_wasted_bw;
2048 	__be64 port_xmit_wait_data;
2049 	__be64 port_rcv_bubble;
2050 	__be64 port_mark_fecn;
2051 	/* Error counters */
2052 	__be64 port_rcv_constraint_errors;
2053 	__be64 port_rcv_switch_relay_errors;
2054 	__be64 port_xmit_discards;
2055 	__be64 port_xmit_constraint_errors;
2056 	__be64 port_rcv_remote_physical_errors;
2057 	__be64 local_link_integrity_errors;
2058 	__be64 port_rcv_errors;
2059 	__be64 excessive_buffer_overruns;
2060 	__be64 fm_config_errors;
2061 	__be32 link_error_recovery;
2062 	__be32 link_downed;
2063 	u8 uncorrectable_errors;
2064 
2065 	u8 link_quality_indicator; /* 5res, 3bit */
2066 	u8 res2[6];
2067 	struct _vls_pctrs {
2068 		/* per-VL Data counters */
2069 		__be64 port_vl_xmit_data;
2070 		__be64 port_vl_rcv_data;
2071 		__be64 port_vl_xmit_pkts;
2072 		__be64 port_vl_rcv_pkts;
2073 		__be64 port_vl_xmit_wait;
2074 		__be64 sw_port_vl_congestion;
2075 		__be64 port_vl_rcv_fecn;
2076 		__be64 port_vl_rcv_becn;
2077 		__be64 port_xmit_time_cong;
2078 		__be64 port_vl_xmit_wasted_bw;
2079 		__be64 port_vl_xmit_wait_data;
2080 		__be64 port_vl_rcv_bubble;
2081 		__be64 port_vl_mark_fecn;
2082 		__be64 port_vl_xmit_discards;
2083 	} vls[0]; /* real array size defined by # bits set in vl_select_mask */
2084 };
2085 
2086 enum counter_selects {
2087 	CS_PORT_XMIT_DATA			= (1 << 31),
2088 	CS_PORT_RCV_DATA			= (1 << 30),
2089 	CS_PORT_XMIT_PKTS			= (1 << 29),
2090 	CS_PORT_RCV_PKTS			= (1 << 28),
2091 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2092 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2093 	CS_PORT_XMIT_WAIT			= (1 << 25),
2094 	CS_SW_PORT_CONGESTION			= (1 << 24),
2095 	CS_PORT_RCV_FECN			= (1 << 23),
2096 	CS_PORT_RCV_BECN			= (1 << 22),
2097 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2098 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2099 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2100 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2101 	CS_PORT_MARK_FECN			= (1 << 17),
2102 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2103 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2104 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2105 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2106 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2107 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2108 	CS_PORT_RCV_ERRORS			= (1 << 10),
2109 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2110 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2111 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2112 	CS_LINK_DOWNED				= (1 << 6),
2113 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2114 };
2115 
2116 struct opa_clear_port_status {
2117 	__be64 port_select_mask[4];
2118 	__be32 counter_select_mask;
2119 };
2120 
2121 struct opa_aggregate {
2122 	__be16 attr_id;
2123 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2124 	__be32 attr_mod;
2125 	u8 data[0];
2126 };
2127 
2128 #define MSK_LLI 0x000000f0
2129 #define MSK_LLI_SFT 4
2130 #define MSK_LER 0x0000000f
2131 #define MSK_LER_SFT 0
2132 #define ADD_LLI 8
2133 #define ADD_LER 2
2134 
2135 /* Request contains first three fields, response contains those plus the rest */
2136 struct opa_port_data_counters_msg {
2137 	__be64 port_select_mask[4];
2138 	__be32 vl_select_mask;
2139 	__be32 resolution;
2140 
2141 	/* Response fields follow */
2142 	struct _port_dctrs {
2143 		u8 port_number;
2144 		u8 reserved2[3];
2145 		__be32 link_quality_indicator; /* 29res, 3bit */
2146 
2147 		/* Data counters */
2148 		__be64 port_xmit_data;
2149 		__be64 port_rcv_data;
2150 		__be64 port_xmit_pkts;
2151 		__be64 port_rcv_pkts;
2152 		__be64 port_multicast_xmit_pkts;
2153 		__be64 port_multicast_rcv_pkts;
2154 		__be64 port_xmit_wait;
2155 		__be64 sw_port_congestion;
2156 		__be64 port_rcv_fecn;
2157 		__be64 port_rcv_becn;
2158 		__be64 port_xmit_time_cong;
2159 		__be64 port_xmit_wasted_bw;
2160 		__be64 port_xmit_wait_data;
2161 		__be64 port_rcv_bubble;
2162 		__be64 port_mark_fecn;
2163 
2164 		__be64 port_error_counter_summary;
2165 		/* Sum of error counts/port */
2166 
2167 		struct _vls_dctrs {
2168 			/* per-VL Data counters */
2169 			__be64 port_vl_xmit_data;
2170 			__be64 port_vl_rcv_data;
2171 			__be64 port_vl_xmit_pkts;
2172 			__be64 port_vl_rcv_pkts;
2173 			__be64 port_vl_xmit_wait;
2174 			__be64 sw_port_vl_congestion;
2175 			__be64 port_vl_rcv_fecn;
2176 			__be64 port_vl_rcv_becn;
2177 			__be64 port_xmit_time_cong;
2178 			__be64 port_vl_xmit_wasted_bw;
2179 			__be64 port_vl_xmit_wait_data;
2180 			__be64 port_vl_rcv_bubble;
2181 			__be64 port_vl_mark_fecn;
2182 		} vls[0];
2183 		/* array size defined by #bits set in vl_select_mask*/
2184 	} port[1]; /* array size defined by  #ports in attribute modifier */
2185 };
2186 
2187 struct opa_port_error_counters64_msg {
2188 	/*
2189 	 * Request contains first two fields, response contains the
2190 	 * whole magilla
2191 	 */
2192 	__be64 port_select_mask[4];
2193 	__be32 vl_select_mask;
2194 
2195 	/* Response-only fields follow */
2196 	__be32 reserved1;
2197 	struct _port_ectrs {
2198 		u8 port_number;
2199 		u8 reserved2[7];
2200 		__be64 port_rcv_constraint_errors;
2201 		__be64 port_rcv_switch_relay_errors;
2202 		__be64 port_xmit_discards;
2203 		__be64 port_xmit_constraint_errors;
2204 		__be64 port_rcv_remote_physical_errors;
2205 		__be64 local_link_integrity_errors;
2206 		__be64 port_rcv_errors;
2207 		__be64 excessive_buffer_overruns;
2208 		__be64 fm_config_errors;
2209 		__be32 link_error_recovery;
2210 		__be32 link_downed;
2211 		u8 uncorrectable_errors;
2212 		u8 reserved3[7];
2213 		struct _vls_ectrs {
2214 			__be64 port_vl_xmit_discards;
2215 		} vls[0];
2216 		/* array size defined by #bits set in vl_select_mask */
2217 	} port[1]; /* array size defined by #ports in attribute modifier */
2218 };
2219 
2220 struct opa_port_error_info_msg {
2221 	__be64 port_select_mask[4];
2222 	__be32 error_info_select_mask;
2223 	__be32 reserved1;
2224 	struct _port_ei {
2225 		u8 port_number;
2226 		u8 reserved2[7];
2227 
2228 		/* PortRcvErrorInfo */
2229 		struct {
2230 			u8 status_and_code;
2231 			union {
2232 				u8 raw[17];
2233 				struct {
2234 					/* EI1to12 format */
2235 					u8 packet_flit1[8];
2236 					u8 packet_flit2[8];
2237 					u8 remaining_flit_bits12;
2238 				} ei1to12;
2239 				struct {
2240 					u8 packet_bytes[8];
2241 					u8 remaining_flit_bits;
2242 				} ei13;
2243 			} ei;
2244 			u8 reserved3[6];
2245 		} __packed port_rcv_ei;
2246 
2247 		/* ExcessiveBufferOverrunInfo */
2248 		struct {
2249 			u8 status_and_sc;
2250 			u8 reserved4[7];
2251 		} __packed excessive_buffer_overrun_ei;
2252 
2253 		/* PortXmitConstraintErrorInfo */
2254 		struct {
2255 			u8 status;
2256 			u8 reserved5;
2257 			__be16 pkey;
2258 			__be32 slid;
2259 		} __packed port_xmit_constraint_ei;
2260 
2261 		/* PortRcvConstraintErrorInfo */
2262 		struct {
2263 			u8 status;
2264 			u8 reserved6;
2265 			__be16 pkey;
2266 			__be32 slid;
2267 		} __packed port_rcv_constraint_ei;
2268 
2269 		/* PortRcvSwitchRelayErrorInfo */
2270 		struct {
2271 			u8 status_and_code;
2272 			u8 reserved7[3];
2273 			__u32 error_info;
2274 		} __packed port_rcv_switch_relay_ei;
2275 
2276 		/* UncorrectableErrorInfo */
2277 		struct {
2278 			u8 status_and_code;
2279 			u8 reserved8;
2280 		} __packed uncorrectable_ei;
2281 
2282 		/* FMConfigErrorInfo */
2283 		struct {
2284 			u8 status_and_code;
2285 			u8 error_info;
2286 		} __packed fm_config_ei;
2287 		__u32 reserved9;
2288 	} port[1]; /* actual array size defined by #ports in attr modifier */
2289 };
2290 
2291 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2292 enum error_info_selects {
2293 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2294 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2295 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2296 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2297 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2298 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2299 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2300 };
2301 
2302 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2303 				     struct ib_device *ibdev, u32 *resp_len)
2304 {
2305 	struct opa_class_port_info *p =
2306 		(struct opa_class_port_info *)pmp->data;
2307 
2308 	memset(pmp->data, 0, sizeof(pmp->data));
2309 
2310 	if (pmp->mad_hdr.attr_mod != 0)
2311 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2312 
2313 	p->base_version = OPA_MGMT_BASE_VERSION;
2314 	p->class_version = OPA_SMI_CLASS_VERSION;
2315 	/*
2316 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2317 	 */
2318 	p->cap_mask2_resp_time = cpu_to_be32(18);
2319 
2320 	if (resp_len)
2321 		*resp_len += sizeof(*p);
2322 
2323 	return reply((struct ib_mad_hdr *)pmp);
2324 }
2325 
2326 static void a0_portstatus(struct hfi1_pportdata *ppd,
2327 			  struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2328 {
2329 	if (!is_bx(ppd->dd)) {
2330 		unsigned long vl;
2331 		u64 sum_vl_xmit_wait = 0;
2332 		u32 vl_all_mask = VL_MASK_ALL;
2333 
2334 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2335 				 8 * sizeof(vl_all_mask)) {
2336 			u64 tmp = sum_vl_xmit_wait +
2337 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2338 						 idx_from_vl(vl));
2339 			if (tmp < sum_vl_xmit_wait) {
2340 				/* we wrapped */
2341 				sum_vl_xmit_wait = (u64)~0;
2342 				break;
2343 			}
2344 			sum_vl_xmit_wait = tmp;
2345 		}
2346 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2347 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2348 	}
2349 }
2350 
2351 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2352 				  struct ib_device *ibdev,
2353 				  u8 port, u32 *resp_len)
2354 {
2355 	struct opa_port_status_req *req =
2356 		(struct opa_port_status_req *)pmp->data;
2357 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2358 	struct opa_port_status_rsp *rsp;
2359 	u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2360 	unsigned long vl;
2361 	size_t response_data_size;
2362 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2363 	u8 port_num = req->port_num;
2364 	u8 num_vls = hweight32(vl_select_mask);
2365 	struct _vls_pctrs *vlinfo;
2366 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2367 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2368 	int vfi;
2369 	u64 tmp, tmp2;
2370 
2371 	response_data_size = sizeof(struct opa_port_status_rsp) +
2372 				num_vls * sizeof(struct _vls_pctrs);
2373 	if (response_data_size > sizeof(pmp->data)) {
2374 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2375 		return reply((struct ib_mad_hdr *)pmp);
2376 	}
2377 
2378 	if (nports != 1 || (port_num && port_num != port) ||
2379 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2380 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2381 		return reply((struct ib_mad_hdr *)pmp);
2382 	}
2383 
2384 	memset(pmp->data, 0, sizeof(pmp->data));
2385 
2386 	rsp = (struct opa_port_status_rsp *)pmp->data;
2387 	if (port_num)
2388 		rsp->port_num = port_num;
2389 	else
2390 		rsp->port_num = port;
2391 
2392 	rsp->port_rcv_constraint_errors =
2393 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2394 					   CNTR_INVALID_VL));
2395 
2396 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2397 
2398 	rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2399 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2400 					  CNTR_INVALID_VL));
2401 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2402 					 CNTR_INVALID_VL));
2403 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2404 					  CNTR_INVALID_VL));
2405 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2406 					 CNTR_INVALID_VL));
2407 	rsp->port_multicast_xmit_pkts =
2408 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2409 					  CNTR_INVALID_VL));
2410 	rsp->port_multicast_rcv_pkts =
2411 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2412 					  CNTR_INVALID_VL));
2413 	rsp->port_xmit_wait =
2414 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2415 	rsp->port_rcv_fecn =
2416 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2417 	rsp->port_rcv_becn =
2418 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2419 	rsp->port_xmit_discards =
2420 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2421 					   CNTR_INVALID_VL));
2422 	rsp->port_xmit_constraint_errors =
2423 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2424 					   CNTR_INVALID_VL));
2425 	rsp->port_rcv_remote_physical_errors =
2426 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2427 					  CNTR_INVALID_VL));
2428 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2429 	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2430 	if (tmp2 < tmp) {
2431 		/* overflow/wrapped */
2432 		rsp->local_link_integrity_errors = cpu_to_be64(~0);
2433 	} else {
2434 		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2435 	}
2436 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2437 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2438 				   CNTR_INVALID_VL);
2439 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2440 		/* overflow/wrapped */
2441 		rsp->link_error_recovery = cpu_to_be32(~0);
2442 	} else {
2443 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2444 	}
2445 	rsp->port_rcv_errors =
2446 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2447 	rsp->excessive_buffer_overruns =
2448 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2449 	rsp->fm_config_errors =
2450 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2451 					  CNTR_INVALID_VL));
2452 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2453 						      CNTR_INVALID_VL));
2454 
2455 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2456 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2457 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2458 
2459 	vlinfo = &rsp->vls[0];
2460 	vfi = 0;
2461 	/* The vl_select_mask has been checked above, and we know
2462 	 * that it contains only entries which represent valid VLs.
2463 	 * So in the for_each_set_bit() loop below, we don't need
2464 	 * any additional checks for vl.
2465 	 */
2466 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2467 			 8 * sizeof(vl_select_mask)) {
2468 		memset(vlinfo, 0, sizeof(*vlinfo));
2469 
2470 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2471 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2472 
2473 		rsp->vls[vfi].port_vl_rcv_pkts =
2474 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2475 						  idx_from_vl(vl)));
2476 
2477 		rsp->vls[vfi].port_vl_xmit_data =
2478 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2479 						   idx_from_vl(vl)));
2480 
2481 		rsp->vls[vfi].port_vl_xmit_pkts =
2482 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2483 						   idx_from_vl(vl)));
2484 
2485 		rsp->vls[vfi].port_vl_xmit_wait =
2486 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2487 						   idx_from_vl(vl)));
2488 
2489 		rsp->vls[vfi].port_vl_rcv_fecn =
2490 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2491 						  idx_from_vl(vl)));
2492 
2493 		rsp->vls[vfi].port_vl_rcv_becn =
2494 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2495 						  idx_from_vl(vl)));
2496 
2497 		vlinfo++;
2498 		vfi++;
2499 	}
2500 
2501 	a0_portstatus(ppd, rsp, vl_select_mask);
2502 
2503 	if (resp_len)
2504 		*resp_len += response_data_size;
2505 
2506 	return reply((struct ib_mad_hdr *)pmp);
2507 }
2508 
2509 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2510 				     u8 res_lli, u8 res_ler)
2511 {
2512 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2513 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2514 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2515 	u64 error_counter_summary = 0, tmp;
2516 
2517 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2518 						CNTR_INVALID_VL);
2519 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2520 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2521 						CNTR_INVALID_VL);
2522 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2523 						CNTR_INVALID_VL);
2524 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2525 					       CNTR_INVALID_VL);
2526 	/* local link integrity must be right-shifted by the lli resolution */
2527 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2528 	tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2529 	error_counter_summary += (tmp >> res_lli);
2530 	/* link error recovery must b right-shifted by the ler resolution */
2531 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2532 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2533 	error_counter_summary += (tmp >> res_ler);
2534 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2535 					       CNTR_INVALID_VL);
2536 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2537 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2538 					       CNTR_INVALID_VL);
2539 	/* ppd->link_downed is a 32-bit value */
2540 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2541 						CNTR_INVALID_VL);
2542 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2543 	/* this is an 8-bit quantity */
2544 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2545 
2546 	return error_counter_summary;
2547 }
2548 
2549 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2550 			    u32 vl_select_mask)
2551 {
2552 	if (!is_bx(ppd->dd)) {
2553 		unsigned long vl;
2554 		u64 sum_vl_xmit_wait = 0;
2555 		u32 vl_all_mask = VL_MASK_ALL;
2556 
2557 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2558 				 8 * sizeof(vl_all_mask)) {
2559 			u64 tmp = sum_vl_xmit_wait +
2560 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2561 						 idx_from_vl(vl));
2562 			if (tmp < sum_vl_xmit_wait) {
2563 				/* we wrapped */
2564 				sum_vl_xmit_wait = (u64)~0;
2565 				break;
2566 			}
2567 			sum_vl_xmit_wait = tmp;
2568 		}
2569 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2570 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2571 	}
2572 }
2573 
2574 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2575 				   struct _port_dctrs *rsp)
2576 {
2577 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2578 
2579 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2580 						CNTR_INVALID_VL));
2581 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2582 						CNTR_INVALID_VL));
2583 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2584 						CNTR_INVALID_VL));
2585 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2586 						CNTR_INVALID_VL));
2587 	rsp->port_multicast_xmit_pkts =
2588 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2589 					  CNTR_INVALID_VL));
2590 	rsp->port_multicast_rcv_pkts =
2591 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2592 					  CNTR_INVALID_VL));
2593 }
2594 
2595 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2596 				    struct ib_device *ibdev,
2597 				    u8 port, u32 *resp_len)
2598 {
2599 	struct opa_port_data_counters_msg *req =
2600 		(struct opa_port_data_counters_msg *)pmp->data;
2601 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2602 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2603 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2604 	struct _port_dctrs *rsp;
2605 	struct _vls_dctrs *vlinfo;
2606 	size_t response_data_size;
2607 	u32 num_ports;
2608 	u8 num_pslm;
2609 	u8 lq, num_vls;
2610 	u8 res_lli, res_ler;
2611 	u64 port_mask;
2612 	unsigned long port_num;
2613 	unsigned long vl;
2614 	u32 vl_select_mask;
2615 	int vfi;
2616 
2617 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2618 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2619 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2620 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2621 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2622 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2623 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2624 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2625 
2626 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2627 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2628 		return reply((struct ib_mad_hdr *)pmp);
2629 	}
2630 
2631 	/* Sanity check */
2632 	response_data_size = sizeof(struct opa_port_data_counters_msg) +
2633 				num_vls * sizeof(struct _vls_dctrs);
2634 
2635 	if (response_data_size > sizeof(pmp->data)) {
2636 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2637 		return reply((struct ib_mad_hdr *)pmp);
2638 	}
2639 
2640 	/*
2641 	 * The bit set in the mask needs to be consistent with the
2642 	 * port the request came in on.
2643 	 */
2644 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2645 	port_num = find_first_bit((unsigned long *)&port_mask,
2646 				  sizeof(port_mask));
2647 
2648 	if ((u8)port_num != port) {
2649 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2650 		return reply((struct ib_mad_hdr *)pmp);
2651 	}
2652 
2653 	rsp = &req->port[0];
2654 	memset(rsp, 0, sizeof(*rsp));
2655 
2656 	rsp->port_number = port;
2657 	/*
2658 	 * Note that link_quality_indicator is a 32 bit quantity in
2659 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2660 	 * where it's a byte).
2661 	 */
2662 	hfi1_read_link_quality(dd, &lq);
2663 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2664 	pma_get_opa_port_dctrs(ibdev, rsp);
2665 
2666 	rsp->port_xmit_wait =
2667 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2668 	rsp->port_rcv_fecn =
2669 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2670 	rsp->port_rcv_becn =
2671 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2672 	rsp->port_error_counter_summary =
2673 		cpu_to_be64(get_error_counter_summary(ibdev, port,
2674 						      res_lli, res_ler));
2675 
2676 	vlinfo = &rsp->vls[0];
2677 	vfi = 0;
2678 	/* The vl_select_mask has been checked above, and we know
2679 	 * that it contains only entries which represent valid VLs.
2680 	 * So in the for_each_set_bit() loop below, we don't need
2681 	 * any additional checks for vl.
2682 	 */
2683 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2684 			 8 * sizeof(req->vl_select_mask)) {
2685 		memset(vlinfo, 0, sizeof(*vlinfo));
2686 
2687 		rsp->vls[vfi].port_vl_xmit_data =
2688 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2689 						   idx_from_vl(vl)));
2690 
2691 		rsp->vls[vfi].port_vl_rcv_data =
2692 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2693 						  idx_from_vl(vl)));
2694 
2695 		rsp->vls[vfi].port_vl_xmit_pkts =
2696 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2697 						   idx_from_vl(vl)));
2698 
2699 		rsp->vls[vfi].port_vl_rcv_pkts =
2700 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2701 						  idx_from_vl(vl)));
2702 
2703 		rsp->vls[vfi].port_vl_xmit_wait =
2704 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2705 						   idx_from_vl(vl)));
2706 
2707 		rsp->vls[vfi].port_vl_rcv_fecn =
2708 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2709 						  idx_from_vl(vl)));
2710 		rsp->vls[vfi].port_vl_rcv_becn =
2711 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2712 						  idx_from_vl(vl)));
2713 
2714 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2715 		/* rsp->port_vl_xmit_wasted_bw ??? */
2716 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2717 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2718 		 */
2719 		/*rsp->vls[vfi].port_vl_mark_fecn =
2720 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2721 		 *		+ offset));
2722 		 */
2723 		vlinfo++;
2724 		vfi++;
2725 	}
2726 
2727 	a0_datacounters(ppd, rsp, vl_select_mask);
2728 
2729 	if (resp_len)
2730 		*resp_len += response_data_size;
2731 
2732 	return reply((struct ib_mad_hdr *)pmp);
2733 }
2734 
2735 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2736 				       struct ib_device *ibdev, u8 port)
2737 {
2738 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2739 						pmp->data;
2740 	struct _port_dctrs rsp;
2741 
2742 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2743 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2744 		goto bail;
2745 	}
2746 
2747 	memset(&rsp, 0, sizeof(rsp));
2748 	pma_get_opa_port_dctrs(ibdev, &rsp);
2749 
2750 	p->port_xmit_data = rsp.port_xmit_data;
2751 	p->port_rcv_data = rsp.port_rcv_data;
2752 	p->port_xmit_packets = rsp.port_xmit_pkts;
2753 	p->port_rcv_packets = rsp.port_rcv_pkts;
2754 	p->port_unicast_xmit_packets = 0;
2755 	p->port_unicast_rcv_packets =  0;
2756 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2757 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2758 
2759 bail:
2760 	return reply((struct ib_mad_hdr *)pmp);
2761 }
2762 
2763 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2764 				   struct _port_ectrs *rsp, u8 port)
2765 {
2766 	u64 tmp, tmp2;
2767 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2768 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2769 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2770 
2771 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2772 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2773 					CNTR_INVALID_VL);
2774 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2775 		/* overflow/wrapped */
2776 		rsp->link_error_recovery = cpu_to_be32(~0);
2777 	} else {
2778 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2779 	}
2780 
2781 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2782 						CNTR_INVALID_VL));
2783 	rsp->port_rcv_errors =
2784 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2785 	rsp->port_rcv_remote_physical_errors =
2786 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2787 					  CNTR_INVALID_VL));
2788 	rsp->port_rcv_switch_relay_errors = 0;
2789 	rsp->port_xmit_discards =
2790 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2791 					   CNTR_INVALID_VL));
2792 	rsp->port_xmit_constraint_errors =
2793 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2794 					   CNTR_INVALID_VL));
2795 	rsp->port_rcv_constraint_errors =
2796 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2797 					   CNTR_INVALID_VL));
2798 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2799 	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2800 	if (tmp2 < tmp) {
2801 		/* overflow/wrapped */
2802 		rsp->local_link_integrity_errors = cpu_to_be64(~0);
2803 	} else {
2804 		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2805 	}
2806 	rsp->excessive_buffer_overruns =
2807 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2808 }
2809 
2810 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2811 				  struct ib_device *ibdev,
2812 				  u8 port, u32 *resp_len)
2813 {
2814 	size_t response_data_size;
2815 	struct _port_ectrs *rsp;
2816 	u8 port_num;
2817 	struct opa_port_error_counters64_msg *req;
2818 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2819 	u32 num_ports;
2820 	u8 num_pslm;
2821 	u8 num_vls;
2822 	struct hfi1_ibport *ibp;
2823 	struct hfi1_pportdata *ppd;
2824 	struct _vls_ectrs *vlinfo;
2825 	unsigned long vl;
2826 	u64 port_mask, tmp;
2827 	u32 vl_select_mask;
2828 	int vfi;
2829 
2830 	req = (struct opa_port_error_counters64_msg *)pmp->data;
2831 
2832 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2833 
2834 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2835 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2836 
2837 	if (num_ports != 1 || num_ports != num_pslm) {
2838 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2839 		return reply((struct ib_mad_hdr *)pmp);
2840 	}
2841 
2842 	response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2843 				num_vls * sizeof(struct _vls_ectrs);
2844 
2845 	if (response_data_size > sizeof(pmp->data)) {
2846 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2847 		return reply((struct ib_mad_hdr *)pmp);
2848 	}
2849 	/*
2850 	 * The bit set in the mask needs to be consistent with the
2851 	 * port the request came in on.
2852 	 */
2853 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2854 	port_num = find_first_bit((unsigned long *)&port_mask,
2855 				  sizeof(port_mask));
2856 
2857 	if (port_num != port) {
2858 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2859 		return reply((struct ib_mad_hdr *)pmp);
2860 	}
2861 
2862 	rsp = &req->port[0];
2863 
2864 	ibp = to_iport(ibdev, port_num);
2865 	ppd = ppd_from_ibp(ibp);
2866 
2867 	memset(rsp, 0, sizeof(*rsp));
2868 	rsp->port_number = port_num;
2869 
2870 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2871 
2872 	rsp->port_rcv_remote_physical_errors =
2873 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2874 					  CNTR_INVALID_VL));
2875 	rsp->fm_config_errors =
2876 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2877 					  CNTR_INVALID_VL));
2878 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2879 
2880 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2881 
2882 	vlinfo = &rsp->vls[0];
2883 	vfi = 0;
2884 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2885 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2886 			 8 * sizeof(req->vl_select_mask)) {
2887 		memset(vlinfo, 0, sizeof(*vlinfo));
2888 		/* vlinfo->vls[vfi].port_vl_xmit_discards ??? */
2889 		vlinfo += 1;
2890 		vfi++;
2891 	}
2892 
2893 	if (resp_len)
2894 		*resp_len += response_data_size;
2895 
2896 	return reply((struct ib_mad_hdr *)pmp);
2897 }
2898 
2899 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2900 				   struct ib_device *ibdev, u8 port)
2901 {
2902 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2903 		pmp->data;
2904 	struct _port_ectrs rsp;
2905 	u64 temp_link_overrun_errors;
2906 	u64 temp_64;
2907 	u32 temp_32;
2908 
2909 	memset(&rsp, 0, sizeof(rsp));
2910 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
2911 
2912 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2913 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2914 		goto bail;
2915 	}
2916 
2917 	p->symbol_error_counter = 0; /* N/A for OPA */
2918 
2919 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
2920 	if (temp_32 > 0xFFUL)
2921 		p->link_error_recovery_counter = 0xFF;
2922 	else
2923 		p->link_error_recovery_counter = (u8)temp_32;
2924 
2925 	temp_32 = be32_to_cpu(rsp.link_downed);
2926 	if (temp_32 > 0xFFUL)
2927 		p->link_downed_counter = 0xFF;
2928 	else
2929 		p->link_downed_counter = (u8)temp_32;
2930 
2931 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2932 	if (temp_64 > 0xFFFFUL)
2933 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
2934 	else
2935 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2936 
2937 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2938 	if (temp_64 > 0xFFFFUL)
2939 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2940 	else
2941 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2942 
2943 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2944 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2945 
2946 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2947 	if (temp_64 > 0xFFFFUL)
2948 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
2949 	else
2950 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2951 
2952 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2953 	if (temp_64 > 0xFFUL)
2954 		p->port_xmit_constraint_errors = 0xFF;
2955 	else
2956 		p->port_xmit_constraint_errors = (u8)temp_64;
2957 
2958 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2959 	if (temp_64 > 0xFFUL)
2960 		p->port_rcv_constraint_errors = 0xFFUL;
2961 	else
2962 		p->port_rcv_constraint_errors = (u8)temp_64;
2963 
2964 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
2965 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2966 	if (temp_64 > 0xFUL)
2967 		temp_64 = 0xFUL;
2968 
2969 	temp_link_overrun_errors = temp_64 << 4;
2970 
2971 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2972 	if (temp_64 > 0xFUL)
2973 		temp_64 = 0xFUL;
2974 	temp_link_overrun_errors |= temp_64;
2975 
2976 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
2977 
2978 	p->vl15_dropped = 0; /* N/A for OPA */
2979 
2980 bail:
2981 	return reply((struct ib_mad_hdr *)pmp);
2982 }
2983 
2984 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2985 				 struct ib_device *ibdev,
2986 				 u8 port, u32 *resp_len)
2987 {
2988 	size_t response_data_size;
2989 	struct _port_ei *rsp;
2990 	struct opa_port_error_info_msg *req;
2991 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2992 	u64 port_mask;
2993 	u32 num_ports;
2994 	u8 port_num;
2995 	u8 num_pslm;
2996 	u64 reg;
2997 
2998 	req = (struct opa_port_error_info_msg *)pmp->data;
2999 	rsp = &req->port[0];
3000 
3001 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3002 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3003 
3004 	memset(rsp, 0, sizeof(*rsp));
3005 
3006 	if (num_ports != 1 || num_ports != num_pslm) {
3007 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3008 		return reply((struct ib_mad_hdr *)pmp);
3009 	}
3010 
3011 	/* Sanity check */
3012 	response_data_size = sizeof(struct opa_port_error_info_msg);
3013 
3014 	if (response_data_size > sizeof(pmp->data)) {
3015 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3016 		return reply((struct ib_mad_hdr *)pmp);
3017 	}
3018 
3019 	/*
3020 	 * The bit set in the mask needs to be consistent with the port
3021 	 * the request came in on.
3022 	 */
3023 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3024 	port_num = find_first_bit((unsigned long *)&port_mask,
3025 				  sizeof(port_mask));
3026 
3027 	if (port_num != port) {
3028 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3029 		return reply((struct ib_mad_hdr *)pmp);
3030 	}
3031 
3032 	/* PortRcvErrorInfo */
3033 	rsp->port_rcv_ei.status_and_code =
3034 		dd->err_info_rcvport.status_and_code;
3035 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3036 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3037 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3038 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3039 
3040 	/* ExcessiverBufferOverrunInfo */
3041 	reg = read_csr(dd, RCV_ERR_INFO);
3042 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3043 		/*
3044 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3045 		 * first pkt that encountered an excess buffer overrun
3046 		 */
3047 		u8 tmp = (u8)reg;
3048 
3049 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3050 		tmp <<= 2;
3051 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3052 		/* set the status bit */
3053 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3054 	}
3055 
3056 	rsp->port_xmit_constraint_ei.status =
3057 		dd->err_info_xmit_constraint.status;
3058 	rsp->port_xmit_constraint_ei.pkey =
3059 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3060 	rsp->port_xmit_constraint_ei.slid =
3061 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3062 
3063 	rsp->port_rcv_constraint_ei.status =
3064 		dd->err_info_rcv_constraint.status;
3065 	rsp->port_rcv_constraint_ei.pkey =
3066 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3067 	rsp->port_rcv_constraint_ei.slid =
3068 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3069 
3070 	/* UncorrectableErrorInfo */
3071 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3072 
3073 	/* FMConfigErrorInfo */
3074 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3075 
3076 	if (resp_len)
3077 		*resp_len += response_data_size;
3078 
3079 	return reply((struct ib_mad_hdr *)pmp);
3080 }
3081 
3082 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3083 				  struct ib_device *ibdev,
3084 				  u8 port, u32 *resp_len)
3085 {
3086 	struct opa_clear_port_status *req =
3087 		(struct opa_clear_port_status *)pmp->data;
3088 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3089 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3090 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3091 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3092 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3093 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3094 	u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3095 	unsigned long vl;
3096 
3097 	if ((nports != 1) || (portn != 1 << port)) {
3098 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3099 		return reply((struct ib_mad_hdr *)pmp);
3100 	}
3101 	/*
3102 	 * only counters returned by pma_get_opa_portstatus() are
3103 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3104 	 * the corresponding change should be made here as well.
3105 	 */
3106 
3107 	if (counter_select & CS_PORT_XMIT_DATA)
3108 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3109 
3110 	if (counter_select & CS_PORT_RCV_DATA)
3111 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3112 
3113 	if (counter_select & CS_PORT_XMIT_PKTS)
3114 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3115 
3116 	if (counter_select & CS_PORT_RCV_PKTS)
3117 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3118 
3119 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3120 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3121 
3122 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3123 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3124 
3125 	if (counter_select & CS_PORT_XMIT_WAIT)
3126 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3127 
3128 	/* ignore cs_sw_portCongestion for HFIs */
3129 
3130 	if (counter_select & CS_PORT_RCV_FECN)
3131 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3132 
3133 	if (counter_select & CS_PORT_RCV_BECN)
3134 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3135 
3136 	/* ignore cs_port_xmit_time_cong for HFIs */
3137 	/* ignore cs_port_xmit_wasted_bw for now */
3138 	/* ignore cs_port_xmit_wait_data for now */
3139 	if (counter_select & CS_PORT_RCV_BUBBLE)
3140 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3141 
3142 	/* Only applicable for switch */
3143 	/* if (counter_select & CS_PORT_MARK_FECN)
3144 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3145 	 */
3146 
3147 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3148 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3149 
3150 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3151 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3152 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3153 
3154 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3155 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3156 
3157 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3158 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3159 
3160 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) {
3161 		write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3162 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3163 	}
3164 
3165 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3166 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3167 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3168 			       CNTR_INVALID_VL, 0);
3169 	}
3170 
3171 	if (counter_select & CS_PORT_RCV_ERRORS)
3172 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3173 
3174 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3175 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3176 		dd->rcv_ovfl_cnt = 0;
3177 	}
3178 
3179 	if (counter_select & CS_FM_CONFIG_ERRORS)
3180 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3181 
3182 	if (counter_select & CS_LINK_DOWNED)
3183 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3184 
3185 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3186 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3187 
3188 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3189 			 8 * sizeof(vl_select_mask)) {
3190 		if (counter_select & CS_PORT_XMIT_DATA)
3191 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3192 
3193 		if (counter_select & CS_PORT_RCV_DATA)
3194 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3195 
3196 		if (counter_select & CS_PORT_XMIT_PKTS)
3197 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3198 
3199 		if (counter_select & CS_PORT_RCV_PKTS)
3200 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3201 
3202 		if (counter_select & CS_PORT_XMIT_WAIT)
3203 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3204 
3205 		/* sw_port_vl_congestion is 0 for HFIs */
3206 		if (counter_select & CS_PORT_RCV_FECN)
3207 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3208 
3209 		if (counter_select & CS_PORT_RCV_BECN)
3210 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3211 
3212 		/* port_vl_xmit_time_cong is 0 for HFIs */
3213 		/* port_vl_xmit_wasted_bw ??? */
3214 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3215 		if (counter_select & CS_PORT_RCV_BUBBLE)
3216 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3217 
3218 		/* if (counter_select & CS_PORT_MARK_FECN)
3219 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3220 		 */
3221 		/* port_vl_xmit_discards ??? */
3222 	}
3223 
3224 	if (resp_len)
3225 		*resp_len += sizeof(*req);
3226 
3227 	return reply((struct ib_mad_hdr *)pmp);
3228 }
3229 
3230 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3231 				 struct ib_device *ibdev,
3232 				 u8 port, u32 *resp_len)
3233 {
3234 	struct _port_ei *rsp;
3235 	struct opa_port_error_info_msg *req;
3236 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3237 	u64 port_mask;
3238 	u32 num_ports;
3239 	u8 port_num;
3240 	u8 num_pslm;
3241 	u32 error_info_select;
3242 
3243 	req = (struct opa_port_error_info_msg *)pmp->data;
3244 	rsp = &req->port[0];
3245 
3246 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3247 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3248 
3249 	memset(rsp, 0, sizeof(*rsp));
3250 
3251 	if (num_ports != 1 || num_ports != num_pslm) {
3252 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3253 		return reply((struct ib_mad_hdr *)pmp);
3254 	}
3255 
3256 	/*
3257 	 * The bit set in the mask needs to be consistent with the port
3258 	 * the request came in on.
3259 	 */
3260 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3261 	port_num = find_first_bit((unsigned long *)&port_mask,
3262 				  sizeof(port_mask));
3263 
3264 	if (port_num != port) {
3265 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3266 		return reply((struct ib_mad_hdr *)pmp);
3267 	}
3268 
3269 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3270 
3271 	/* PortRcvErrorInfo */
3272 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3273 		/* turn off status bit */
3274 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3275 
3276 	/* ExcessiverBufferOverrunInfo */
3277 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3278 		/*
3279 		 * status bit is essentially kept in the h/w - bit 5 of
3280 		 * RCV_ERR_INFO
3281 		 */
3282 		write_csr(dd, RCV_ERR_INFO,
3283 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3284 
3285 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3286 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3287 
3288 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3289 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3290 
3291 	/* UncorrectableErrorInfo */
3292 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3293 		/* turn off status bit */
3294 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3295 
3296 	/* FMConfigErrorInfo */
3297 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3298 		/* turn off status bit */
3299 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3300 
3301 	if (resp_len)
3302 		*resp_len += sizeof(*req);
3303 
3304 	return reply((struct ib_mad_hdr *)pmp);
3305 }
3306 
3307 struct opa_congestion_info_attr {
3308 	__be16 congestion_info;
3309 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3310 	u8 congestion_log_length;
3311 } __packed;
3312 
3313 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3314 				    struct ib_device *ibdev, u8 port,
3315 				    u32 *resp_len)
3316 {
3317 	struct opa_congestion_info_attr *p =
3318 		(struct opa_congestion_info_attr *)data;
3319 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3320 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3321 
3322 	p->congestion_info = 0;
3323 	p->control_table_cap = ppd->cc_max_table_entries;
3324 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3325 
3326 	if (resp_len)
3327 		*resp_len += sizeof(*p);
3328 
3329 	return reply((struct ib_mad_hdr *)smp);
3330 }
3331 
3332 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3333 				       u8 *data, struct ib_device *ibdev,
3334 				       u8 port, u32 *resp_len)
3335 {
3336 	int i;
3337 	struct opa_congestion_setting_attr *p =
3338 		(struct opa_congestion_setting_attr *)data;
3339 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3340 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3341 	struct opa_congestion_setting_entry_shadow *entries;
3342 	struct cc_state *cc_state;
3343 
3344 	rcu_read_lock();
3345 
3346 	cc_state = get_cc_state(ppd);
3347 
3348 	if (!cc_state) {
3349 		rcu_read_unlock();
3350 		return reply((struct ib_mad_hdr *)smp);
3351 	}
3352 
3353 	entries = cc_state->cong_setting.entries;
3354 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3355 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3356 	for (i = 0; i < OPA_MAX_SLS; i++) {
3357 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3358 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3359 		p->entries[i].trigger_threshold =
3360 			entries[i].trigger_threshold;
3361 		p->entries[i].ccti_min = entries[i].ccti_min;
3362 	}
3363 
3364 	rcu_read_unlock();
3365 
3366 	if (resp_len)
3367 		*resp_len += sizeof(*p);
3368 
3369 	return reply((struct ib_mad_hdr *)smp);
3370 }
3371 
3372 /*
3373  * Apply congestion control information stored in the ppd to the
3374  * active structure.
3375  */
3376 static void apply_cc_state(struct hfi1_pportdata *ppd)
3377 {
3378 	struct cc_state *old_cc_state, *new_cc_state;
3379 
3380 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3381 	if (!new_cc_state)
3382 		return;
3383 
3384 	/*
3385 	 * Hold the lock for updating *and* to prevent ppd information
3386 	 * from changing during the update.
3387 	 */
3388 	spin_lock(&ppd->cc_state_lock);
3389 
3390 	old_cc_state = get_cc_state(ppd);
3391 	if (!old_cc_state) {
3392 		/* never active, or shutting down */
3393 		spin_unlock(&ppd->cc_state_lock);
3394 		kfree(new_cc_state);
3395 		return;
3396 	}
3397 
3398 	*new_cc_state = *old_cc_state;
3399 
3400 	new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3401 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3402 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3403 
3404 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3405 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3406 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3407 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3408 
3409 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3410 
3411 	spin_unlock(&ppd->cc_state_lock);
3412 
3413 	call_rcu(&old_cc_state->rcu, cc_state_reclaim);
3414 }
3415 
3416 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3417 				       struct ib_device *ibdev, u8 port,
3418 				       u32 *resp_len)
3419 {
3420 	struct opa_congestion_setting_attr *p =
3421 		(struct opa_congestion_setting_attr *)data;
3422 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3423 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3424 	struct opa_congestion_setting_entry_shadow *entries;
3425 	int i;
3426 
3427 	/*
3428 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3429 	 * our information is consistent with anyone trying to apply the state.
3430 	 */
3431 	spin_lock(&ppd->cc_state_lock);
3432 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3433 
3434 	entries = ppd->congestion_entries;
3435 	for (i = 0; i < OPA_MAX_SLS; i++) {
3436 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3437 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3438 		entries[i].trigger_threshold =
3439 			p->entries[i].trigger_threshold;
3440 		entries[i].ccti_min = p->entries[i].ccti_min;
3441 	}
3442 	spin_unlock(&ppd->cc_state_lock);
3443 
3444 	/* now apply the information */
3445 	apply_cc_state(ppd);
3446 
3447 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3448 					   resp_len);
3449 }
3450 
3451 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3452 					u8 *data, struct ib_device *ibdev,
3453 					u8 port, u32 *resp_len)
3454 {
3455 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3456 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3457 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3458 	s64 ts;
3459 	int i;
3460 
3461 	if (am != 0) {
3462 		smp->status |= IB_SMP_INVALID_FIELD;
3463 		return reply((struct ib_mad_hdr *)smp);
3464 	}
3465 
3466 	spin_lock_irq(&ppd->cc_log_lock);
3467 
3468 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3469 	cong_log->congestion_flags = 0;
3470 	cong_log->threshold_event_counter =
3471 		cpu_to_be16(ppd->threshold_event_counter);
3472 	memcpy(cong_log->threshold_cong_event_map,
3473 	       ppd->threshold_cong_event_map,
3474 	       sizeof(cong_log->threshold_cong_event_map));
3475 	/* keep timestamp in units of 1.024 usec */
3476 	ts = ktime_to_ns(ktime_get()) / 1024;
3477 	cong_log->current_time_stamp = cpu_to_be32(ts);
3478 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3479 		struct opa_hfi1_cong_log_event_internal *cce =
3480 			&ppd->cc_events[ppd->cc_mad_idx++];
3481 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3482 			ppd->cc_mad_idx = 0;
3483 		/*
3484 		 * Entries which are older than twice the time
3485 		 * required to wrap the counter are supposed to
3486 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3487 		 */
3488 		if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3489 			continue;
3490 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3491 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3492 		       &cce->rqpn, 3);
3493 		cong_log->events[i].sl_svc_type_cn_entry =
3494 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3495 		cong_log->events[i].remote_lid_cn_entry =
3496 			cpu_to_be32(cce->rlid);
3497 		cong_log->events[i].timestamp_cn_entry =
3498 			cpu_to_be32(cce->timestamp);
3499 	}
3500 
3501 	/*
3502 	 * Reset threshold_cong_event_map, and threshold_event_counter
3503 	 * to 0 when log is read.
3504 	 */
3505 	memset(ppd->threshold_cong_event_map, 0x0,
3506 	       sizeof(ppd->threshold_cong_event_map));
3507 	ppd->threshold_event_counter = 0;
3508 
3509 	spin_unlock_irq(&ppd->cc_log_lock);
3510 
3511 	if (resp_len)
3512 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3513 
3514 	return reply((struct ib_mad_hdr *)smp);
3515 }
3516 
3517 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3518 				   struct ib_device *ibdev, u8 port,
3519 				   u32 *resp_len)
3520 {
3521 	struct ib_cc_table_attr *cc_table_attr =
3522 		(struct ib_cc_table_attr *)data;
3523 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3524 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3525 	u32 start_block = OPA_AM_START_BLK(am);
3526 	u32 n_blocks = OPA_AM_NBLK(am);
3527 	struct ib_cc_table_entry_shadow *entries;
3528 	int i, j;
3529 	u32 sentry, eentry;
3530 	struct cc_state *cc_state;
3531 
3532 	/* sanity check n_blocks, start_block */
3533 	if (n_blocks == 0 ||
3534 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3535 		smp->status |= IB_SMP_INVALID_FIELD;
3536 		return reply((struct ib_mad_hdr *)smp);
3537 	}
3538 
3539 	rcu_read_lock();
3540 
3541 	cc_state = get_cc_state(ppd);
3542 
3543 	if (!cc_state) {
3544 		rcu_read_unlock();
3545 		return reply((struct ib_mad_hdr *)smp);
3546 	}
3547 
3548 	sentry = start_block * IB_CCT_ENTRIES;
3549 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3550 
3551 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3552 
3553 	entries = cc_state->cct.entries;
3554 
3555 	/* return n_blocks, though the last block may not be full */
3556 	for (j = 0, i = sentry; i < eentry; j++, i++)
3557 		cc_table_attr->ccti_entries[j].entry =
3558 			cpu_to_be16(entries[i].entry);
3559 
3560 	rcu_read_unlock();
3561 
3562 	if (resp_len)
3563 		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3564 
3565 	return reply((struct ib_mad_hdr *)smp);
3566 }
3567 
3568 void cc_state_reclaim(struct rcu_head *rcu)
3569 {
3570 	struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
3571 
3572 	kfree(cc_state);
3573 }
3574 
3575 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3576 				   struct ib_device *ibdev, u8 port,
3577 				   u32 *resp_len)
3578 {
3579 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3580 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3581 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3582 	u32 start_block = OPA_AM_START_BLK(am);
3583 	u32 n_blocks = OPA_AM_NBLK(am);
3584 	struct ib_cc_table_entry_shadow *entries;
3585 	int i, j;
3586 	u32 sentry, eentry;
3587 	u16 ccti_limit;
3588 
3589 	/* sanity check n_blocks, start_block */
3590 	if (n_blocks == 0 ||
3591 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3592 		smp->status |= IB_SMP_INVALID_FIELD;
3593 		return reply((struct ib_mad_hdr *)smp);
3594 	}
3595 
3596 	sentry = start_block * IB_CCT_ENTRIES;
3597 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3598 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3599 
3600 	/* sanity check ccti_limit */
3601 	ccti_limit = be16_to_cpu(p->ccti_limit);
3602 	if (ccti_limit + 1 > eentry) {
3603 		smp->status |= IB_SMP_INVALID_FIELD;
3604 		return reply((struct ib_mad_hdr *)smp);
3605 	}
3606 
3607 	/*
3608 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3609 	 * our information is consistent with anyone trying to apply the state.
3610 	 */
3611 	spin_lock(&ppd->cc_state_lock);
3612 	ppd->total_cct_entry = ccti_limit + 1;
3613 	entries = ppd->ccti_entries;
3614 	for (j = 0, i = sentry; i < eentry; j++, i++)
3615 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3616 	spin_unlock(&ppd->cc_state_lock);
3617 
3618 	/* now apply the information */
3619 	apply_cc_state(ppd);
3620 
3621 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3622 }
3623 
3624 struct opa_led_info {
3625 	__be32 rsvd_led_mask;
3626 	__be32 rsvd;
3627 };
3628 
3629 #define OPA_LED_SHIFT	31
3630 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3631 
3632 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3633 				   struct ib_device *ibdev, u8 port,
3634 				   u32 *resp_len)
3635 {
3636 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3637 	struct hfi1_pportdata *ppd = dd->pport;
3638 	struct opa_led_info *p = (struct opa_led_info *)data;
3639 	u32 nport = OPA_AM_NPORT(am);
3640 	u32 is_beaconing_active;
3641 
3642 	if (nport != 1) {
3643 		smp->status |= IB_SMP_INVALID_FIELD;
3644 		return reply((struct ib_mad_hdr *)smp);
3645 	}
3646 
3647 	/*
3648 	 * This pairs with the memory barrier in hfi1_start_led_override to
3649 	 * ensure that we read the correct state of LED beaconing represented
3650 	 * by led_override_timer_active
3651 	 */
3652 	smp_rmb();
3653 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3654 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3655 
3656 	if (resp_len)
3657 		*resp_len += sizeof(struct opa_led_info);
3658 
3659 	return reply((struct ib_mad_hdr *)smp);
3660 }
3661 
3662 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3663 				   struct ib_device *ibdev, u8 port,
3664 				   u32 *resp_len)
3665 {
3666 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3667 	struct opa_led_info *p = (struct opa_led_info *)data;
3668 	u32 nport = OPA_AM_NPORT(am);
3669 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3670 
3671 	if (nport != 1) {
3672 		smp->status |= IB_SMP_INVALID_FIELD;
3673 		return reply((struct ib_mad_hdr *)smp);
3674 	}
3675 
3676 	if (on)
3677 		hfi1_start_led_override(dd->pport, 2000, 1500);
3678 	else
3679 		shutdown_led_override(dd->pport);
3680 
3681 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3682 }
3683 
3684 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3685 			    u8 *data, struct ib_device *ibdev, u8 port,
3686 			    u32 *resp_len)
3687 {
3688 	int ret;
3689 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3690 
3691 	switch (attr_id) {
3692 	case IB_SMP_ATTR_NODE_DESC:
3693 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3694 					      resp_len);
3695 		break;
3696 	case IB_SMP_ATTR_NODE_INFO:
3697 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3698 					      resp_len);
3699 		break;
3700 	case IB_SMP_ATTR_PORT_INFO:
3701 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3702 					      resp_len);
3703 		break;
3704 	case IB_SMP_ATTR_PKEY_TABLE:
3705 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3706 					       resp_len);
3707 		break;
3708 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3709 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3710 					      resp_len);
3711 		break;
3712 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3713 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3714 					      resp_len);
3715 		break;
3716 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3717 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3718 					       resp_len);
3719 		break;
3720 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3721 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3722 						resp_len);
3723 		break;
3724 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3725 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3726 					 resp_len);
3727 		break;
3728 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3729 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3730 					 resp_len);
3731 		break;
3732 	case OPA_ATTRIB_ID_CABLE_INFO:
3733 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3734 						resp_len);
3735 		break;
3736 	case IB_SMP_ATTR_VL_ARB_TABLE:
3737 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3738 					    resp_len);
3739 		break;
3740 	case OPA_ATTRIB_ID_CONGESTION_INFO:
3741 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3742 					       resp_len);
3743 		break;
3744 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3745 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3746 						  port, resp_len);
3747 		break;
3748 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3749 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3750 						   port, resp_len);
3751 		break;
3752 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3753 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3754 					      resp_len);
3755 		break;
3756 	case IB_SMP_ATTR_LED_INFO:
3757 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3758 					      resp_len);
3759 		break;
3760 	case IB_SMP_ATTR_SM_INFO:
3761 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3762 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3763 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3764 			return IB_MAD_RESULT_SUCCESS;
3765 		/* FALLTHROUGH */
3766 	default:
3767 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3768 		ret = reply((struct ib_mad_hdr *)smp);
3769 		break;
3770 	}
3771 	return ret;
3772 }
3773 
3774 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3775 			    u8 *data, struct ib_device *ibdev, u8 port,
3776 			    u32 *resp_len)
3777 {
3778 	int ret;
3779 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3780 
3781 	switch (attr_id) {
3782 	case IB_SMP_ATTR_PORT_INFO:
3783 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3784 					      resp_len);
3785 		break;
3786 	case IB_SMP_ATTR_PKEY_TABLE:
3787 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3788 					       resp_len);
3789 		break;
3790 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3791 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3792 					      resp_len);
3793 		break;
3794 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3795 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3796 					      resp_len);
3797 		break;
3798 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3799 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3800 					       resp_len);
3801 		break;
3802 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3803 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3804 						resp_len);
3805 		break;
3806 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3807 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3808 					 resp_len);
3809 		break;
3810 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3811 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3812 					 resp_len);
3813 		break;
3814 	case IB_SMP_ATTR_VL_ARB_TABLE:
3815 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3816 					    resp_len);
3817 		break;
3818 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3819 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3820 						  port, resp_len);
3821 		break;
3822 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3823 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3824 					      resp_len);
3825 		break;
3826 	case IB_SMP_ATTR_LED_INFO:
3827 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3828 					      resp_len);
3829 		break;
3830 	case IB_SMP_ATTR_SM_INFO:
3831 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3832 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3833 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3834 			return IB_MAD_RESULT_SUCCESS;
3835 		/* FALLTHROUGH */
3836 	default:
3837 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3838 		ret = reply((struct ib_mad_hdr *)smp);
3839 		break;
3840 	}
3841 	return ret;
3842 }
3843 
3844 static inline void set_aggr_error(struct opa_aggregate *ag)
3845 {
3846 	ag->err_reqlength |= cpu_to_be16(0x8000);
3847 }
3848 
3849 static int subn_get_opa_aggregate(struct opa_smp *smp,
3850 				  struct ib_device *ibdev, u8 port,
3851 				  u32 *resp_len)
3852 {
3853 	int i;
3854 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3855 	u8 *next_smp = opa_get_smp_data(smp);
3856 
3857 	if (num_attr < 1 || num_attr > 117) {
3858 		smp->status |= IB_SMP_INVALID_FIELD;
3859 		return reply((struct ib_mad_hdr *)smp);
3860 	}
3861 
3862 	for (i = 0; i < num_attr; i++) {
3863 		struct opa_aggregate *agg;
3864 		size_t agg_data_len;
3865 		size_t agg_size;
3866 		u32 am;
3867 
3868 		agg = (struct opa_aggregate *)next_smp;
3869 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3870 		agg_size = sizeof(*agg) + agg_data_len;
3871 		am = be32_to_cpu(agg->attr_mod);
3872 
3873 		*resp_len += agg_size;
3874 
3875 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3876 			smp->status |= IB_SMP_INVALID_FIELD;
3877 			return reply((struct ib_mad_hdr *)smp);
3878 		}
3879 
3880 		/* zero the payload for this segment */
3881 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
3882 
3883 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3884 					ibdev, port, NULL);
3885 		if (smp->status & ~IB_SMP_DIRECTION) {
3886 			set_aggr_error(agg);
3887 			return reply((struct ib_mad_hdr *)smp);
3888 		}
3889 		next_smp += agg_size;
3890 	}
3891 
3892 	return reply((struct ib_mad_hdr *)smp);
3893 }
3894 
3895 static int subn_set_opa_aggregate(struct opa_smp *smp,
3896 				  struct ib_device *ibdev, u8 port,
3897 				  u32 *resp_len)
3898 {
3899 	int i;
3900 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3901 	u8 *next_smp = opa_get_smp_data(smp);
3902 
3903 	if (num_attr < 1 || num_attr > 117) {
3904 		smp->status |= IB_SMP_INVALID_FIELD;
3905 		return reply((struct ib_mad_hdr *)smp);
3906 	}
3907 
3908 	for (i = 0; i < num_attr; i++) {
3909 		struct opa_aggregate *agg;
3910 		size_t agg_data_len;
3911 		size_t agg_size;
3912 		u32 am;
3913 
3914 		agg = (struct opa_aggregate *)next_smp;
3915 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3916 		agg_size = sizeof(*agg) + agg_data_len;
3917 		am = be32_to_cpu(agg->attr_mod);
3918 
3919 		*resp_len += agg_size;
3920 
3921 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3922 			smp->status |= IB_SMP_INVALID_FIELD;
3923 			return reply((struct ib_mad_hdr *)smp);
3924 		}
3925 
3926 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3927 					ibdev, port, NULL);
3928 		if (smp->status & ~IB_SMP_DIRECTION) {
3929 			set_aggr_error(agg);
3930 			return reply((struct ib_mad_hdr *)smp);
3931 		}
3932 		next_smp += agg_size;
3933 	}
3934 
3935 	return reply((struct ib_mad_hdr *)smp);
3936 }
3937 
3938 /*
3939  * OPAv1 specifies that, on the transition to link up, these counters
3940  * are cleared:
3941  *   PortRcvErrors [*]
3942  *   LinkErrorRecovery
3943  *   LocalLinkIntegrityErrors
3944  *   ExcessiveBufferOverruns [*]
3945  *
3946  * [*] Error info associated with these counters is retained, but the
3947  * error info status is reset to 0.
3948  */
3949 void clear_linkup_counters(struct hfi1_devdata *dd)
3950 {
3951 	/* PortRcvErrors */
3952 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3953 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3954 	/* LinkErrorRecovery */
3955 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3956 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3957 	/* LocalLinkIntegrityErrors */
3958 	write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3959 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3960 	/* ExcessiveBufferOverruns */
3961 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3962 	dd->rcv_ovfl_cnt = 0;
3963 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3964 }
3965 
3966 /*
3967  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3968  * local node, 0 otherwise.
3969  */
3970 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3971 			const struct ib_wc *in_wc)
3972 {
3973 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3974 	const struct opa_smp *smp = (const struct opa_smp *)mad;
3975 
3976 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3977 		return (smp->hop_cnt == 0 &&
3978 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3979 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3980 	}
3981 
3982 	return (in_wc->slid == ppd->lid);
3983 }
3984 
3985 /*
3986  * opa_local_smp_check() should only be called on MADs for which
3987  * is_local_mad() returns true. It applies the SMP checks that are
3988  * specific to SMPs which are sent from, and destined to this node.
3989  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3990  * otherwise.
3991  *
3992  * SMPs which arrive from other nodes are instead checked by
3993  * opa_smp_check().
3994  */
3995 static int opa_local_smp_check(struct hfi1_ibport *ibp,
3996 			       const struct ib_wc *in_wc)
3997 {
3998 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3999 	u16 slid = in_wc->slid;
4000 	u16 pkey;
4001 
4002 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4003 		return 1;
4004 
4005 	pkey = ppd->pkeys[in_wc->pkey_index];
4006 	/*
4007 	 * We need to do the "node-local" checks specified in OPAv1,
4008 	 * rev 0.90, section 9.10.26, which are:
4009 	 *   - pkey is 0x7fff, or 0xffff
4010 	 *   - Source QPN == 0 || Destination QPN == 0
4011 	 *   - the MAD header's management class is either
4012 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4013 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4014 	 *   - SLID != 0
4015 	 *
4016 	 * However, we know (and so don't need to check again) that,
4017 	 * for local SMPs, the MAD stack passes MADs with:
4018 	 *   - Source QPN of 0
4019 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4020 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4021 	 *     our own port's lid
4022 	 *
4023 	 */
4024 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4025 		return 0;
4026 	ingress_pkey_table_fail(ppd, pkey, slid);
4027 	return 1;
4028 }
4029 
4030 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4031 			    u8 port, const struct opa_mad *in_mad,
4032 			    struct opa_mad *out_mad,
4033 			    u32 *resp_len)
4034 {
4035 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4036 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4037 	u8 *data;
4038 	u32 am;
4039 	__be16 attr_id;
4040 	int ret;
4041 
4042 	*out_mad = *in_mad;
4043 	data = opa_get_smp_data(smp);
4044 
4045 	am = be32_to_cpu(smp->attr_mod);
4046 	attr_id = smp->attr_id;
4047 	if (smp->class_version != OPA_SMI_CLASS_VERSION) {
4048 		smp->status |= IB_SMP_UNSUP_VERSION;
4049 		ret = reply((struct ib_mad_hdr *)smp);
4050 		return ret;
4051 	}
4052 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4053 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4054 			 smp->hop_cnt);
4055 	if (ret) {
4056 		u32 port_num = be32_to_cpu(smp->attr_mod);
4057 
4058 		/*
4059 		 * If this is a get/set portinfo, we already check the
4060 		 * M_Key if the MAD is for another port and the M_Key
4061 		 * is OK on the receiving port. This check is needed
4062 		 * to increment the error counters when the M_Key
4063 		 * fails to match on *both* ports.
4064 		 */
4065 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4066 		    (smp->method == IB_MGMT_METHOD_GET ||
4067 		     smp->method == IB_MGMT_METHOD_SET) &&
4068 		    port_num && port_num <= ibdev->phys_port_cnt &&
4069 		    port != port_num)
4070 			(void)check_mkey(to_iport(ibdev, port_num),
4071 					  (struct ib_mad_hdr *)smp, 0,
4072 					  smp->mkey, smp->route.dr.dr_slid,
4073 					  smp->route.dr.return_path,
4074 					  smp->hop_cnt);
4075 		ret = IB_MAD_RESULT_FAILURE;
4076 		return ret;
4077 	}
4078 
4079 	*resp_len = opa_get_smp_header_size(smp);
4080 
4081 	switch (smp->method) {
4082 	case IB_MGMT_METHOD_GET:
4083 		switch (attr_id) {
4084 		default:
4085 			clear_opa_smp_data(smp);
4086 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4087 					       ibdev, port, resp_len);
4088 			break;
4089 		case OPA_ATTRIB_ID_AGGREGATE:
4090 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4091 						     resp_len);
4092 			break;
4093 		}
4094 		break;
4095 	case IB_MGMT_METHOD_SET:
4096 		switch (attr_id) {
4097 		default:
4098 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4099 					       ibdev, port, resp_len);
4100 			break;
4101 		case OPA_ATTRIB_ID_AGGREGATE:
4102 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4103 						     resp_len);
4104 			break;
4105 		}
4106 		break;
4107 	case IB_MGMT_METHOD_TRAP:
4108 	case IB_MGMT_METHOD_REPORT:
4109 	case IB_MGMT_METHOD_REPORT_RESP:
4110 	case IB_MGMT_METHOD_GET_RESP:
4111 		/*
4112 		 * The ib_mad module will call us to process responses
4113 		 * before checking for other consumers.
4114 		 * Just tell the caller to process it normally.
4115 		 */
4116 		ret = IB_MAD_RESULT_SUCCESS;
4117 		break;
4118 	default:
4119 		smp->status |= IB_SMP_UNSUP_METHOD;
4120 		ret = reply((struct ib_mad_hdr *)smp);
4121 		break;
4122 	}
4123 
4124 	return ret;
4125 }
4126 
4127 static int process_subn(struct ib_device *ibdev, int mad_flags,
4128 			u8 port, const struct ib_mad *in_mad,
4129 			struct ib_mad *out_mad)
4130 {
4131 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4132 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4133 	int ret;
4134 
4135 	*out_mad = *in_mad;
4136 	if (smp->class_version != 1) {
4137 		smp->status |= IB_SMP_UNSUP_VERSION;
4138 		ret = reply((struct ib_mad_hdr *)smp);
4139 		return ret;
4140 	}
4141 
4142 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4143 			 smp->mkey, (__force __be32)smp->dr_slid,
4144 			 smp->return_path, smp->hop_cnt);
4145 	if (ret) {
4146 		u32 port_num = be32_to_cpu(smp->attr_mod);
4147 
4148 		/*
4149 		 * If this is a get/set portinfo, we already check the
4150 		 * M_Key if the MAD is for another port and the M_Key
4151 		 * is OK on the receiving port. This check is needed
4152 		 * to increment the error counters when the M_Key
4153 		 * fails to match on *both* ports.
4154 		 */
4155 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4156 		    (smp->method == IB_MGMT_METHOD_GET ||
4157 		     smp->method == IB_MGMT_METHOD_SET) &&
4158 		    port_num && port_num <= ibdev->phys_port_cnt &&
4159 		    port != port_num)
4160 			(void)check_mkey(to_iport(ibdev, port_num),
4161 					 (struct ib_mad_hdr *)smp, 0,
4162 					 smp->mkey,
4163 					 (__force __be32)smp->dr_slid,
4164 					 smp->return_path, smp->hop_cnt);
4165 		ret = IB_MAD_RESULT_FAILURE;
4166 		return ret;
4167 	}
4168 
4169 	switch (smp->method) {
4170 	case IB_MGMT_METHOD_GET:
4171 		switch (smp->attr_id) {
4172 		case IB_SMP_ATTR_NODE_INFO:
4173 			ret = subn_get_nodeinfo(smp, ibdev, port);
4174 			break;
4175 		default:
4176 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4177 			ret = reply((struct ib_mad_hdr *)smp);
4178 			break;
4179 		}
4180 		break;
4181 	}
4182 
4183 	return ret;
4184 }
4185 
4186 static int process_perf(struct ib_device *ibdev, u8 port,
4187 			const struct ib_mad *in_mad,
4188 			struct ib_mad *out_mad)
4189 {
4190 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4191 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4192 						&pmp->data;
4193 	int ret = IB_MAD_RESULT_FAILURE;
4194 
4195 	*out_mad = *in_mad;
4196 	if (pmp->mad_hdr.class_version != 1) {
4197 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4198 		ret = reply((struct ib_mad_hdr *)pmp);
4199 		return ret;
4200 	}
4201 
4202 	switch (pmp->mad_hdr.method) {
4203 	case IB_MGMT_METHOD_GET:
4204 		switch (pmp->mad_hdr.attr_id) {
4205 		case IB_PMA_PORT_COUNTERS:
4206 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4207 			break;
4208 		case IB_PMA_PORT_COUNTERS_EXT:
4209 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4210 			break;
4211 		case IB_PMA_CLASS_PORT_INFO:
4212 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4213 			ret = reply((struct ib_mad_hdr *)pmp);
4214 			break;
4215 		default:
4216 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4217 			ret = reply((struct ib_mad_hdr *)pmp);
4218 			break;
4219 		}
4220 		break;
4221 
4222 	case IB_MGMT_METHOD_SET:
4223 		if (pmp->mad_hdr.attr_id) {
4224 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4225 			ret = reply((struct ib_mad_hdr *)pmp);
4226 		}
4227 		break;
4228 
4229 	case IB_MGMT_METHOD_TRAP:
4230 	case IB_MGMT_METHOD_GET_RESP:
4231 		/*
4232 		 * The ib_mad module will call us to process responses
4233 		 * before checking for other consumers.
4234 		 * Just tell the caller to process it normally.
4235 		 */
4236 		ret = IB_MAD_RESULT_SUCCESS;
4237 		break;
4238 
4239 	default:
4240 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4241 		ret = reply((struct ib_mad_hdr *)pmp);
4242 		break;
4243 	}
4244 
4245 	return ret;
4246 }
4247 
4248 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4249 			    const struct opa_mad *in_mad,
4250 			    struct opa_mad *out_mad, u32 *resp_len)
4251 {
4252 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4253 	int ret;
4254 
4255 	*out_mad = *in_mad;
4256 
4257 	if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
4258 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4259 		return reply((struct ib_mad_hdr *)pmp);
4260 	}
4261 
4262 	*resp_len = sizeof(pmp->mad_hdr);
4263 
4264 	switch (pmp->mad_hdr.method) {
4265 	case IB_MGMT_METHOD_GET:
4266 		switch (pmp->mad_hdr.attr_id) {
4267 		case IB_PMA_CLASS_PORT_INFO:
4268 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4269 			break;
4270 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4271 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4272 						     resp_len);
4273 			break;
4274 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4275 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4276 						       resp_len);
4277 			break;
4278 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4279 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4280 						     resp_len);
4281 			break;
4282 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4283 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4284 						    resp_len);
4285 			break;
4286 		default:
4287 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4288 			ret = reply((struct ib_mad_hdr *)pmp);
4289 			break;
4290 		}
4291 		break;
4292 
4293 	case IB_MGMT_METHOD_SET:
4294 		switch (pmp->mad_hdr.attr_id) {
4295 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4296 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4297 						     resp_len);
4298 			break;
4299 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4300 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4301 						    resp_len);
4302 			break;
4303 		default:
4304 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4305 			ret = reply((struct ib_mad_hdr *)pmp);
4306 			break;
4307 		}
4308 		break;
4309 
4310 	case IB_MGMT_METHOD_TRAP:
4311 	case IB_MGMT_METHOD_GET_RESP:
4312 		/*
4313 		 * The ib_mad module will call us to process responses
4314 		 * before checking for other consumers.
4315 		 * Just tell the caller to process it normally.
4316 		 */
4317 		ret = IB_MAD_RESULT_SUCCESS;
4318 		break;
4319 
4320 	default:
4321 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4322 		ret = reply((struct ib_mad_hdr *)pmp);
4323 		break;
4324 	}
4325 
4326 	return ret;
4327 }
4328 
4329 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4330 				u8 port, const struct ib_wc *in_wc,
4331 				const struct ib_grh *in_grh,
4332 				const struct opa_mad *in_mad,
4333 				struct opa_mad *out_mad, size_t *out_mad_size,
4334 				u16 *out_mad_pkey_index)
4335 {
4336 	int ret;
4337 	int pkey_idx;
4338 	u32 resp_len = 0;
4339 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4340 
4341 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4342 	if (pkey_idx < 0) {
4343 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4344 			hfi1_get_pkey(ibp, 1));
4345 		pkey_idx = 1;
4346 	}
4347 	*out_mad_pkey_index = (u16)pkey_idx;
4348 
4349 	switch (in_mad->mad_hdr.mgmt_class) {
4350 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4351 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4352 		if (is_local_mad(ibp, in_mad, in_wc)) {
4353 			ret = opa_local_smp_check(ibp, in_wc);
4354 			if (ret)
4355 				return IB_MAD_RESULT_FAILURE;
4356 		}
4357 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4358 				       out_mad, &resp_len);
4359 		goto bail;
4360 	case IB_MGMT_CLASS_PERF_MGMT:
4361 		ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4362 				       &resp_len);
4363 		goto bail;
4364 
4365 	default:
4366 		ret = IB_MAD_RESULT_SUCCESS;
4367 	}
4368 
4369 bail:
4370 	if (ret & IB_MAD_RESULT_REPLY)
4371 		*out_mad_size = round_up(resp_len, 8);
4372 	else if (ret & IB_MAD_RESULT_SUCCESS)
4373 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4374 
4375 	return ret;
4376 }
4377 
4378 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4379 			       const struct ib_wc *in_wc,
4380 			       const struct ib_grh *in_grh,
4381 			       const struct ib_mad *in_mad,
4382 			       struct ib_mad *out_mad)
4383 {
4384 	int ret;
4385 
4386 	switch (in_mad->mad_hdr.mgmt_class) {
4387 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4388 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4389 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4390 		break;
4391 	case IB_MGMT_CLASS_PERF_MGMT:
4392 		ret = process_perf(ibdev, port, in_mad, out_mad);
4393 		break;
4394 	default:
4395 		ret = IB_MAD_RESULT_SUCCESS;
4396 		break;
4397 	}
4398 
4399 	return ret;
4400 }
4401 
4402 /**
4403  * hfi1_process_mad - process an incoming MAD packet
4404  * @ibdev: the infiniband device this packet came in on
4405  * @mad_flags: MAD flags
4406  * @port: the port number this packet came in on
4407  * @in_wc: the work completion entry for this packet
4408  * @in_grh: the global route header for this packet
4409  * @in_mad: the incoming MAD
4410  * @out_mad: any outgoing MAD reply
4411  *
4412  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4413  * interested in processing.
4414  *
4415  * Note that the verbs framework has already done the MAD sanity checks,
4416  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4417  * MADs.
4418  *
4419  * This is called by the ib_mad module.
4420  */
4421 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4422 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4423 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4424 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4425 		     u16 *out_mad_pkey_index)
4426 {
4427 	switch (in_mad->base_version) {
4428 	case OPA_MGMT_BASE_VERSION:
4429 		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4430 			dev_err(ibdev->dma_device, "invalid in_mad_size\n");
4431 			return IB_MAD_RESULT_FAILURE;
4432 		}
4433 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4434 					    in_wc, in_grh,
4435 					    (struct opa_mad *)in_mad,
4436 					    (struct opa_mad *)out_mad,
4437 					    out_mad_size,
4438 					    out_mad_pkey_index);
4439 	case IB_MGMT_BASE_VERSION:
4440 		return hfi1_process_ib_mad(ibdev, mad_flags, port,
4441 					  in_wc, in_grh,
4442 					  (const struct ib_mad *)in_mad,
4443 					  (struct ib_mad *)out_mad);
4444 	default:
4445 		break;
4446 	}
4447 
4448 	return IB_MAD_RESULT_FAILURE;
4449 }
4450