xref: /openbmc/linux/drivers/infiniband/hw/hfi1/mad.c (revision e5c86679)
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51 
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56 
57 /* the reset value from the FM is supposed to be 0xffff, handle both */
58 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
59 #define OPA_LINK_WIDTH_RESET 0xffff
60 
61 static int reply(struct ib_mad_hdr *smp)
62 {
63 	/*
64 	 * The verbs framework will handle the directed/LID route
65 	 * packet changes.
66 	 */
67 	smp->method = IB_MGMT_METHOD_GET_RESP;
68 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
69 		smp->status |= IB_SMP_DIRECTION;
70 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
71 }
72 
73 static inline void clear_opa_smp_data(struct opa_smp *smp)
74 {
75 	void *data = opa_get_smp_data(smp);
76 	size_t size = opa_get_smp_data_size(smp);
77 
78 	memset(data, 0, size);
79 }
80 
81 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
82 {
83 	struct ib_event event;
84 
85 	event.event = IB_EVENT_PKEY_CHANGE;
86 	event.device = &dd->verbs_dev.rdi.ibdev;
87 	event.element.port_num = port;
88 	ib_dispatch_event(&event);
89 }
90 
91 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
92 {
93 	struct ib_mad_send_buf *send_buf;
94 	struct ib_mad_agent *agent;
95 	struct opa_smp *smp;
96 	int ret;
97 	unsigned long flags;
98 	unsigned long timeout;
99 	int pkey_idx;
100 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
101 
102 	agent = ibp->rvp.send_agent;
103 	if (!agent)
104 		return;
105 
106 	/* o14-3.2.1 */
107 	if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
108 		return;
109 
110 	/* o14-2 */
111 	if (ibp->rvp.trap_timeout && time_before(jiffies,
112 						 ibp->rvp.trap_timeout))
113 		return;
114 
115 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
116 	if (pkey_idx < 0) {
117 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
118 			__func__, hfi1_get_pkey(ibp, 1));
119 		pkey_idx = 1;
120 	}
121 
122 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
123 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
124 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
125 	if (IS_ERR(send_buf))
126 		return;
127 
128 	smp = send_buf->mad;
129 	smp->base_version = OPA_MGMT_BASE_VERSION;
130 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
131 	smp->class_version = OPA_SM_CLASS_VERSION;
132 	smp->method = IB_MGMT_METHOD_TRAP;
133 	ibp->rvp.tid++;
134 	smp->tid = cpu_to_be64(ibp->rvp.tid);
135 	smp->attr_id = IB_SMP_ATTR_NOTICE;
136 	/* o14-1: smp->mkey = 0; */
137 	memcpy(smp->route.lid.data, data, len);
138 
139 	spin_lock_irqsave(&ibp->rvp.lock, flags);
140 	if (!ibp->rvp.sm_ah) {
141 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
142 			struct ib_ah *ah;
143 
144 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
145 			if (IS_ERR(ah)) {
146 				ret = PTR_ERR(ah);
147 			} else {
148 				send_buf->ah = ah;
149 				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
150 				ret = 0;
151 			}
152 		} else {
153 			ret = -EINVAL;
154 		}
155 	} else {
156 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
157 		ret = 0;
158 	}
159 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
160 
161 	if (!ret)
162 		ret = ib_post_send_mad(send_buf, NULL);
163 	if (!ret) {
164 		/* 4.096 usec. */
165 		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
166 		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
167 	} else {
168 		ib_free_send_mad(send_buf);
169 		ibp->rvp.trap_timeout = 0;
170 	}
171 }
172 
173 /*
174  * Send a bad [PQ]_Key trap (ch. 14.3.8).
175  */
176 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
177 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2)
178 {
179 	struct opa_mad_notice_attr data;
180 	u32 lid = ppd_from_ibp(ibp)->lid;
181 	u32 _lid1 = lid1;
182 	u32 _lid2 = lid2;
183 
184 	memset(&data, 0, sizeof(data));
185 
186 	if (trap_num == OPA_TRAP_BAD_P_KEY)
187 		ibp->rvp.pkey_violations++;
188 	else
189 		ibp->rvp.qkey_violations++;
190 	ibp->rvp.n_pkt_drops++;
191 
192 	/* Send violation trap */
193 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
194 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
195 	data.trap_num = trap_num;
196 	data.issuer_lid = cpu_to_be32(lid);
197 	data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
198 	data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
199 	data.ntc_257_258.key = cpu_to_be32(key);
200 	data.ntc_257_258.sl = sl << 3;
201 	data.ntc_257_258.qp1 = cpu_to_be32(qp1);
202 	data.ntc_257_258.qp2 = cpu_to_be32(qp2);
203 
204 	send_trap(ibp, &data, sizeof(data));
205 }
206 
207 /*
208  * Send a bad M_Key trap (ch. 14.3.9).
209  */
210 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
211 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
212 {
213 	struct opa_mad_notice_attr data;
214 	u32 lid = ppd_from_ibp(ibp)->lid;
215 
216 	memset(&data, 0, sizeof(data));
217 	/* Send violation trap */
218 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
219 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
220 	data.trap_num = OPA_TRAP_BAD_M_KEY;
221 	data.issuer_lid = cpu_to_be32(lid);
222 	data.ntc_256.lid = data.issuer_lid;
223 	data.ntc_256.method = mad->method;
224 	data.ntc_256.attr_id = mad->attr_id;
225 	data.ntc_256.attr_mod = mad->attr_mod;
226 	data.ntc_256.mkey = mkey;
227 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
228 		data.ntc_256.dr_slid = dr_slid;
229 		data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
230 		if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
231 			data.ntc_256.dr_trunc_hop |=
232 				IB_NOTICE_TRAP_DR_TRUNC;
233 			hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
234 		}
235 		data.ntc_256.dr_trunc_hop |= hop_cnt;
236 		memcpy(data.ntc_256.dr_rtn_path, return_path,
237 		       hop_cnt);
238 	}
239 
240 	send_trap(ibp, &data, sizeof(data));
241 }
242 
243 /*
244  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
245  */
246 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
247 {
248 	struct opa_mad_notice_attr data;
249 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
250 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
251 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
252 	u32 lid = ppd_from_ibp(ibp)->lid;
253 
254 	memset(&data, 0, sizeof(data));
255 
256 	data.generic_type = IB_NOTICE_TYPE_INFO;
257 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
258 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
259 	data.issuer_lid = cpu_to_be32(lid);
260 	data.ntc_144.lid = data.issuer_lid;
261 	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
262 
263 	send_trap(ibp, &data, sizeof(data));
264 }
265 
266 /*
267  * Send a System Image GUID Changed trap (ch. 14.3.12).
268  */
269 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
270 {
271 	struct opa_mad_notice_attr data;
272 	u32 lid = ppd_from_ibp(ibp)->lid;
273 
274 	memset(&data, 0, sizeof(data));
275 
276 	data.generic_type = IB_NOTICE_TYPE_INFO;
277 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
278 	data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
279 	data.issuer_lid = cpu_to_be32(lid);
280 	data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
281 	data.ntc_145.lid = data.issuer_lid;
282 
283 	send_trap(ibp, &data, sizeof(data));
284 }
285 
286 /*
287  * Send a Node Description Changed trap (ch. 14.3.13).
288  */
289 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
290 {
291 	struct opa_mad_notice_attr data;
292 	u32 lid = ppd_from_ibp(ibp)->lid;
293 
294 	memset(&data, 0, sizeof(data));
295 
296 	data.generic_type = IB_NOTICE_TYPE_INFO;
297 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
298 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
299 	data.issuer_lid = cpu_to_be32(lid);
300 	data.ntc_144.lid = data.issuer_lid;
301 	data.ntc_144.change_flags =
302 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
303 
304 	send_trap(ibp, &data, sizeof(data));
305 }
306 
307 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
308 				   u8 *data, struct ib_device *ibdev,
309 				   u8 port, u32 *resp_len)
310 {
311 	struct opa_node_description *nd;
312 
313 	if (am) {
314 		smp->status |= IB_SMP_INVALID_FIELD;
315 		return reply((struct ib_mad_hdr *)smp);
316 	}
317 
318 	nd = (struct opa_node_description *)data;
319 
320 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
321 
322 	if (resp_len)
323 		*resp_len += sizeof(*nd);
324 
325 	return reply((struct ib_mad_hdr *)smp);
326 }
327 
328 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
329 				   struct ib_device *ibdev, u8 port,
330 				   u32 *resp_len)
331 {
332 	struct opa_node_info *ni;
333 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
334 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
335 
336 	ni = (struct opa_node_info *)data;
337 
338 	/* GUID 0 is illegal */
339 	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
340 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
341 		smp->status |= IB_SMP_INVALID_FIELD;
342 		return reply((struct ib_mad_hdr *)smp);
343 	}
344 
345 	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
346 	ni->base_version = OPA_MGMT_BASE_VERSION;
347 	ni->class_version = OPA_SM_CLASS_VERSION;
348 	ni->node_type = 1;     /* channel adapter */
349 	ni->num_ports = ibdev->phys_port_cnt;
350 	/* This is already in network order */
351 	ni->system_image_guid = ib_hfi1_sys_image_guid;
352 	ni->node_guid = ibdev->node_guid;
353 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
354 	ni->device_id = cpu_to_be16(dd->pcidev->device);
355 	ni->revision = cpu_to_be32(dd->minrev);
356 	ni->local_port_num = port;
357 	ni->vendor_id[0] = dd->oui1;
358 	ni->vendor_id[1] = dd->oui2;
359 	ni->vendor_id[2] = dd->oui3;
360 
361 	if (resp_len)
362 		*resp_len += sizeof(*ni);
363 
364 	return reply((struct ib_mad_hdr *)smp);
365 }
366 
367 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
368 			     u8 port)
369 {
370 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
371 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
372 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
373 
374 	/* GUID 0 is illegal */
375 	if (smp->attr_mod || pidx >= dd->num_pports ||
376 	    ibdev->node_guid == 0 ||
377 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
378 		smp->status |= IB_SMP_INVALID_FIELD;
379 		return reply((struct ib_mad_hdr *)smp);
380 	}
381 
382 	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
383 	nip->base_version = OPA_MGMT_BASE_VERSION;
384 	nip->class_version = OPA_SM_CLASS_VERSION;
385 	nip->node_type = 1;     /* channel adapter */
386 	nip->num_ports = ibdev->phys_port_cnt;
387 	/* This is already in network order */
388 	nip->sys_guid = ib_hfi1_sys_image_guid;
389 	nip->node_guid = ibdev->node_guid;
390 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
391 	nip->device_id = cpu_to_be16(dd->pcidev->device);
392 	nip->revision = cpu_to_be32(dd->minrev);
393 	nip->local_port_num = port;
394 	nip->vendor_id[0] = dd->oui1;
395 	nip->vendor_id[1] = dd->oui2;
396 	nip->vendor_id[2] = dd->oui3;
397 
398 	return reply((struct ib_mad_hdr *)smp);
399 }
400 
401 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
402 {
403 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
404 }
405 
406 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
407 {
408 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
409 }
410 
411 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
412 {
413 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
414 }
415 
416 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
417 		      int mad_flags, __be64 mkey, __be32 dr_slid,
418 		      u8 return_path[], u8 hop_cnt)
419 {
420 	int valid_mkey = 0;
421 	int ret = 0;
422 
423 	/* Is the mkey in the process of expiring? */
424 	if (ibp->rvp.mkey_lease_timeout &&
425 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
426 		/* Clear timeout and mkey protection field. */
427 		ibp->rvp.mkey_lease_timeout = 0;
428 		ibp->rvp.mkeyprot = 0;
429 	}
430 
431 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
432 	    ibp->rvp.mkey == mkey)
433 		valid_mkey = 1;
434 
435 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
436 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
437 	    (mad->method == IB_MGMT_METHOD_GET ||
438 	     mad->method == IB_MGMT_METHOD_SET ||
439 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
440 		ibp->rvp.mkey_lease_timeout = 0;
441 
442 	if (!valid_mkey) {
443 		switch (mad->method) {
444 		case IB_MGMT_METHOD_GET:
445 			/* Bad mkey not a violation below level 2 */
446 			if (ibp->rvp.mkeyprot < 2)
447 				break;
448 		case IB_MGMT_METHOD_SET:
449 		case IB_MGMT_METHOD_TRAP_REPRESS:
450 			if (ibp->rvp.mkey_violations != 0xFFFF)
451 				++ibp->rvp.mkey_violations;
452 			if (!ibp->rvp.mkey_lease_timeout &&
453 			    ibp->rvp.mkey_lease_period)
454 				ibp->rvp.mkey_lease_timeout = jiffies +
455 					ibp->rvp.mkey_lease_period * HZ;
456 			/* Generate a trap notice. */
457 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
458 				 hop_cnt);
459 			ret = 1;
460 		}
461 	}
462 
463 	return ret;
464 }
465 
466 /*
467  * The SMA caches reads from LCB registers in case the LCB is unavailable.
468  * (The LCB is unavailable in certain link states, for example.)
469  */
470 struct lcb_datum {
471 	u32 off;
472 	u64 val;
473 };
474 
475 static struct lcb_datum lcb_cache[] = {
476 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
477 };
478 
479 static int write_lcb_cache(u32 off, u64 val)
480 {
481 	int i;
482 
483 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
484 		if (lcb_cache[i].off == off) {
485 			lcb_cache[i].val = val;
486 			return 0;
487 		}
488 	}
489 
490 	pr_warn("%s bad offset 0x%x\n", __func__, off);
491 	return -1;
492 }
493 
494 static int read_lcb_cache(u32 off, u64 *val)
495 {
496 	int i;
497 
498 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
499 		if (lcb_cache[i].off == off) {
500 			*val = lcb_cache[i].val;
501 			return 0;
502 		}
503 	}
504 
505 	pr_warn("%s bad offset 0x%x\n", __func__, off);
506 	return -1;
507 }
508 
509 void read_ltp_rtt(struct hfi1_devdata *dd)
510 {
511 	u64 reg;
512 
513 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
514 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
515 	else
516 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
517 }
518 
519 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
520 				   struct ib_device *ibdev, u8 port,
521 				   u32 *resp_len)
522 {
523 	int i;
524 	struct hfi1_devdata *dd;
525 	struct hfi1_pportdata *ppd;
526 	struct hfi1_ibport *ibp;
527 	struct opa_port_info *pi = (struct opa_port_info *)data;
528 	u8 mtu;
529 	u8 credit_rate;
530 	u8 is_beaconing_active;
531 	u32 state;
532 	u32 num_ports = OPA_AM_NPORT(am);
533 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
534 	u32 buffer_units;
535 	u64 tmp = 0;
536 
537 	if (num_ports != 1) {
538 		smp->status |= IB_SMP_INVALID_FIELD;
539 		return reply((struct ib_mad_hdr *)smp);
540 	}
541 
542 	dd = dd_from_ibdev(ibdev);
543 	/* IB numbers ports from 1, hw from 0 */
544 	ppd = dd->pport + (port - 1);
545 	ibp = &ppd->ibport_data;
546 
547 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
548 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
549 		smp->status |= IB_SMP_INVALID_FIELD;
550 		return reply((struct ib_mad_hdr *)smp);
551 	}
552 
553 	pi->lid = cpu_to_be32(ppd->lid);
554 
555 	/* Only return the mkey if the protection field allows it. */
556 	if (!(smp->method == IB_MGMT_METHOD_GET &&
557 	      ibp->rvp.mkey != smp->mkey &&
558 	      ibp->rvp.mkeyprot == 1))
559 		pi->mkey = ibp->rvp.mkey;
560 
561 	pi->subnet_prefix = ibp->rvp.gid_prefix;
562 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
563 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
564 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
565 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
566 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
567 
568 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
569 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
570 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
571 
572 	pi->link_width_downgrade.supported =
573 			cpu_to_be16(ppd->link_width_downgrade_supported);
574 	pi->link_width_downgrade.enabled =
575 			cpu_to_be16(ppd->link_width_downgrade_enabled);
576 	pi->link_width_downgrade.tx_active =
577 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
578 	pi->link_width_downgrade.rx_active =
579 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
580 
581 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
582 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
583 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
584 
585 	state = driver_lstate(ppd);
586 
587 	if (start_of_sm_config && (state == IB_PORT_INIT))
588 		ppd->is_sm_config_started = 1;
589 
590 	pi->port_phys_conf = (ppd->port_type & 0xf);
591 
592 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
593 	pi->port_states.ledenable_offlinereason |=
594 		ppd->is_sm_config_started << 5;
595 	/*
596 	 * This pairs with the memory barrier in hfi1_start_led_override to
597 	 * ensure that we read the correct state of LED beaconing represented
598 	 * by led_override_timer_active
599 	 */
600 	smp_rmb();
601 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
602 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
603 	pi->port_states.ledenable_offlinereason |=
604 		ppd->offline_disabled_reason;
605 
606 	pi->port_states.portphysstate_portstate =
607 		(hfi1_ibphys_portstate(ppd) << 4) | state;
608 
609 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
610 
611 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
612 	for (i = 0; i < ppd->vls_supported; i++) {
613 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
614 		if ((i % 2) == 0)
615 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
616 		else
617 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
618 	}
619 	/* don't forget VL 15 */
620 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
621 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
622 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
623 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
624 	pi->partenforce_filterraw |=
625 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
626 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
627 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
628 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
629 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
630 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
631 	/* P_KeyViolations are counted by hardware. */
632 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
633 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
634 
635 	pi->vl.cap = ppd->vls_supported;
636 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
637 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
638 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
639 
640 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
641 
642 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
643 					  OPA_PORT_LINK_MODE_OPA << 5 |
644 					  OPA_PORT_LINK_MODE_OPA);
645 
646 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
647 
648 	pi->port_mode = cpu_to_be16(
649 				ppd->is_active_optimize_enabled ?
650 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
651 
652 	pi->port_packet_format.supported =
653 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
654 	pi->port_packet_format.enabled =
655 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
656 
657 	/* flit_control.interleave is (OPA V1, version .76):
658 	 * bits		use
659 	 * ----		---
660 	 * 2		res
661 	 * 2		DistanceSupported
662 	 * 2		DistanceEnabled
663 	 * 5		MaxNextLevelTxEnabled
664 	 * 5		MaxNestLevelRxSupported
665 	 *
666 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
667 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
668 	 * to 0x1.
669 	 */
670 	pi->flit_control.interleave = cpu_to_be16(0x1400);
671 
672 	pi->link_down_reason = ppd->local_link_down_reason.sma;
673 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
674 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
675 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
676 
677 	/* 32.768 usec. response time (guessing) */
678 	pi->resptimevalue = 3;
679 
680 	pi->local_port_num = port;
681 
682 	/* buffer info for FM */
683 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
684 
685 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
686 	pi->neigh_port_num = ppd->neighbor_port_number;
687 	pi->port_neigh_mode =
688 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
689 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
690 		(ppd->neighbor_fm_security ?
691 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
692 
693 	/* HFIs shall always return VL15 credits to their
694 	 * neighbor in a timely manner, without any credit return pacing.
695 	 */
696 	credit_rate = 0;
697 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
698 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
699 	buffer_units |= (credit_rate << 6) &
700 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
701 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
702 	pi->buffer_units = cpu_to_be32(buffer_units);
703 
704 	pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
705 
706 	/* HFI supports a replay buffer 128 LTPs in size */
707 	pi->replay_depth.buffer = 0x80;
708 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
709 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
710 
711 	/*
712 	 * this counter is 16 bits wide, but the replay_depth.wire
713 	 * variable is only 8 bits
714 	 */
715 	if (tmp > 0xff)
716 		tmp = 0xff;
717 	pi->replay_depth.wire = tmp;
718 
719 	if (resp_len)
720 		*resp_len += sizeof(struct opa_port_info);
721 
722 	return reply((struct ib_mad_hdr *)smp);
723 }
724 
725 /**
726  * get_pkeys - return the PKEY table
727  * @dd: the hfi1_ib device
728  * @port: the IB port number
729  * @pkeys: the pkey table is placed here
730  */
731 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
732 {
733 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
734 
735 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
736 
737 	return 0;
738 }
739 
740 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
741 				    struct ib_device *ibdev, u8 port,
742 				    u32 *resp_len)
743 {
744 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
745 	u32 n_blocks_req = OPA_AM_NBLK(am);
746 	u32 start_block = am & 0x7ff;
747 	__be16 *p;
748 	u16 *q;
749 	int i;
750 	u16 n_blocks_avail;
751 	unsigned npkeys = hfi1_get_npkeys(dd);
752 	size_t size;
753 
754 	if (n_blocks_req == 0) {
755 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
756 			port, start_block, n_blocks_req);
757 		smp->status |= IB_SMP_INVALID_FIELD;
758 		return reply((struct ib_mad_hdr *)smp);
759 	}
760 
761 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
762 
763 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
764 
765 	if (start_block + n_blocks_req > n_blocks_avail ||
766 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
767 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
768 			"avail 0x%x; blk/smp 0x%lx\n",
769 			start_block, n_blocks_req, n_blocks_avail,
770 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
771 		smp->status |= IB_SMP_INVALID_FIELD;
772 		return reply((struct ib_mad_hdr *)smp);
773 	}
774 
775 	p = (__be16 *)data;
776 	q = (u16 *)data;
777 	/* get the real pkeys if we are requesting the first block */
778 	if (start_block == 0) {
779 		get_pkeys(dd, port, q);
780 		for (i = 0; i < npkeys; i++)
781 			p[i] = cpu_to_be16(q[i]);
782 		if (resp_len)
783 			*resp_len += size;
784 	} else {
785 		smp->status |= IB_SMP_INVALID_FIELD;
786 	}
787 	return reply((struct ib_mad_hdr *)smp);
788 }
789 
790 enum {
791 	HFI_TRANSITION_DISALLOWED,
792 	HFI_TRANSITION_IGNORED,
793 	HFI_TRANSITION_ALLOWED,
794 	HFI_TRANSITION_UNDEFINED,
795 };
796 
797 /*
798  * Use shortened names to improve readability of
799  * {logical,physical}_state_transitions
800  */
801 enum {
802 	__D = HFI_TRANSITION_DISALLOWED,
803 	__I = HFI_TRANSITION_IGNORED,
804 	__A = HFI_TRANSITION_ALLOWED,
805 	__U = HFI_TRANSITION_UNDEFINED,
806 };
807 
808 /*
809  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
810  * represented in physical_state_transitions.
811  */
812 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
813 
814 /*
815  * Within physical_state_transitions, rows represent "old" states,
816  * columns "new" states, and physical_state_transitions.allowed[old][new]
817  * indicates if the transition from old state to new state is legal (see
818  * OPAg1v1, Table 6-4).
819  */
820 static const struct {
821 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
822 } physical_state_transitions = {
823 	{
824 		/* 2    3    4    5    6    7    8    9   10   11 */
825 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
826 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
827 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
828 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
829 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
830 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
831 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
832 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
833 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
834 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
835 	}
836 };
837 
838 /*
839  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
840  * logical_state_transitions
841  */
842 
843 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
844 
845 /*
846  * Within logical_state_transitions rows represent "old" states,
847  * columns "new" states, and logical_state_transitions.allowed[old][new]
848  * indicates if the transition from old state to new state is legal (see
849  * OPAg1v1, Table 9-12).
850  */
851 static const struct {
852 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
853 } logical_state_transitions = {
854 	{
855 		/* 1    2    3    4    5 */
856 	/* 1 */	{ __I, __D, __D, __D, __U},
857 	/* 2 */	{ __D, __I, __A, __D, __U},
858 	/* 3 */	{ __D, __D, __I, __A, __U},
859 	/* 4 */	{ __D, __D, __I, __I, __U},
860 	/* 5 */	{ __U, __U, __U, __U, __U},
861 	}
862 };
863 
864 static int logical_transition_allowed(int old, int new)
865 {
866 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
867 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
868 		pr_warn("invalid logical state(s) (old %d new %d)\n",
869 			old, new);
870 		return HFI_TRANSITION_UNDEFINED;
871 	}
872 
873 	if (new == IB_PORT_NOP)
874 		return HFI_TRANSITION_ALLOWED; /* always allowed */
875 
876 	/* adjust states for indexing into logical_state_transitions */
877 	old -= IB_PORT_DOWN;
878 	new -= IB_PORT_DOWN;
879 
880 	if (old < 0 || new < 0)
881 		return HFI_TRANSITION_UNDEFINED;
882 	return logical_state_transitions.allowed[old][new];
883 }
884 
885 static int physical_transition_allowed(int old, int new)
886 {
887 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
888 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
889 		pr_warn("invalid physical state(s) (old %d new %d)\n",
890 			old, new);
891 		return HFI_TRANSITION_UNDEFINED;
892 	}
893 
894 	if (new == IB_PORTPHYSSTATE_NOP)
895 		return HFI_TRANSITION_ALLOWED; /* always allowed */
896 
897 	/* adjust states for indexing into physical_state_transitions */
898 	old -= IB_PORTPHYSSTATE_POLLING;
899 	new -= IB_PORTPHYSSTATE_POLLING;
900 
901 	if (old < 0 || new < 0)
902 		return HFI_TRANSITION_UNDEFINED;
903 	return physical_state_transitions.allowed[old][new];
904 }
905 
906 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
907 					  u32 logical_new, u32 physical_new)
908 {
909 	u32 physical_old = driver_physical_state(ppd);
910 	u32 logical_old = driver_logical_state(ppd);
911 	int ret, logical_allowed, physical_allowed;
912 
913 	ret = logical_transition_allowed(logical_old, logical_new);
914 	logical_allowed = ret;
915 
916 	if (ret == HFI_TRANSITION_DISALLOWED ||
917 	    ret == HFI_TRANSITION_UNDEFINED) {
918 		pr_warn("invalid logical state transition %s -> %s\n",
919 			opa_lstate_name(logical_old),
920 			opa_lstate_name(logical_new));
921 		return ret;
922 	}
923 
924 	ret = physical_transition_allowed(physical_old, physical_new);
925 	physical_allowed = ret;
926 
927 	if (ret == HFI_TRANSITION_DISALLOWED ||
928 	    ret == HFI_TRANSITION_UNDEFINED) {
929 		pr_warn("invalid physical state transition %s -> %s\n",
930 			opa_pstate_name(physical_old),
931 			opa_pstate_name(physical_new));
932 		return ret;
933 	}
934 
935 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
936 	    physical_allowed == HFI_TRANSITION_IGNORED)
937 		return HFI_TRANSITION_IGNORED;
938 
939 	/*
940 	 * A change request of Physical Port State from
941 	 * 'Offline' to 'Polling' should be ignored.
942 	 */
943 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
944 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
945 		return HFI_TRANSITION_IGNORED;
946 
947 	/*
948 	 * Either physical_allowed or logical_allowed is
949 	 * HFI_TRANSITION_ALLOWED.
950 	 */
951 	return HFI_TRANSITION_ALLOWED;
952 }
953 
954 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
955 			   u32 logical_state, u32 phys_state,
956 			   int suppress_idle_sma)
957 {
958 	struct hfi1_devdata *dd = ppd->dd;
959 	u32 link_state;
960 	int ret;
961 
962 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
963 	if (ret == HFI_TRANSITION_DISALLOWED ||
964 	    ret == HFI_TRANSITION_UNDEFINED) {
965 		/* error message emitted above */
966 		smp->status |= IB_SMP_INVALID_FIELD;
967 		return 0;
968 	}
969 
970 	if (ret == HFI_TRANSITION_IGNORED)
971 		return 0;
972 
973 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
974 	    !(logical_state == IB_PORT_DOWN ||
975 	      logical_state == IB_PORT_NOP)){
976 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
977 			logical_state, phys_state);
978 		smp->status |= IB_SMP_INVALID_FIELD;
979 	}
980 
981 	/*
982 	 * Logical state changes are summarized in OPAv1g1 spec.,
983 	 * Table 9-12; physical state changes are summarized in
984 	 * OPAv1g1 spec., Table 6.4.
985 	 */
986 	switch (logical_state) {
987 	case IB_PORT_NOP:
988 		if (phys_state == IB_PORTPHYSSTATE_NOP)
989 			break;
990 		/* FALLTHROUGH */
991 	case IB_PORT_DOWN:
992 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
993 			link_state = HLS_DN_DOWNDEF;
994 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
995 			link_state = HLS_DN_POLL;
996 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
997 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
998 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
999 			link_state = HLS_DN_DISABLE;
1000 		} else {
1001 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1002 				phys_state);
1003 			smp->status |= IB_SMP_INVALID_FIELD;
1004 			break;
1005 		}
1006 
1007 		if ((link_state == HLS_DN_POLL ||
1008 		     link_state == HLS_DN_DOWNDEF)) {
1009 			/*
1010 			 * Going to poll.  No matter what the current state,
1011 			 * always move offline first, then tune and start the
1012 			 * link.  This correctly handles a FM link bounce and
1013 			 * a link enable.  Going offline is a no-op if already
1014 			 * offline.
1015 			 */
1016 			set_link_state(ppd, HLS_DN_OFFLINE);
1017 			start_link(ppd);
1018 		} else {
1019 			set_link_state(ppd, link_state);
1020 		}
1021 		if (link_state == HLS_DN_DISABLE &&
1022 		    (ppd->offline_disabled_reason >
1023 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1024 		     ppd->offline_disabled_reason ==
1025 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1026 			ppd->offline_disabled_reason =
1027 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1028 		/*
1029 		 * Don't send a reply if the response would be sent
1030 		 * through the disabled port.
1031 		 */
1032 		if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1033 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1034 		break;
1035 	case IB_PORT_ARMED:
1036 		ret = set_link_state(ppd, HLS_UP_ARMED);
1037 		if ((ret == 0) && (suppress_idle_sma == 0))
1038 			send_idle_sma(dd, SMA_IDLE_ARM);
1039 		break;
1040 	case IB_PORT_ACTIVE:
1041 		if (ppd->neighbor_normal) {
1042 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1043 			if (ret == 0)
1044 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1045 		} else {
1046 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1047 			smp->status |= IB_SMP_INVALID_FIELD;
1048 		}
1049 		break;
1050 	default:
1051 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1052 			logical_state);
1053 		smp->status |= IB_SMP_INVALID_FIELD;
1054 	}
1055 
1056 	return 0;
1057 }
1058 
1059 /**
1060  * subn_set_opa_portinfo - set port information
1061  * @smp: the incoming SM packet
1062  * @ibdev: the infiniband device
1063  * @port: the port on the device
1064  *
1065  */
1066 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1067 				   struct ib_device *ibdev, u8 port,
1068 				   u32 *resp_len)
1069 {
1070 	struct opa_port_info *pi = (struct opa_port_info *)data;
1071 	struct ib_event event;
1072 	struct hfi1_devdata *dd;
1073 	struct hfi1_pportdata *ppd;
1074 	struct hfi1_ibport *ibp;
1075 	u8 clientrereg;
1076 	unsigned long flags;
1077 	u32 smlid, opa_lid; /* tmp vars to hold LID values */
1078 	u16 lid;
1079 	u8 ls_old, ls_new, ps_new;
1080 	u8 vls;
1081 	u8 msl;
1082 	u8 crc_enabled;
1083 	u16 lse, lwe, mtu;
1084 	u32 num_ports = OPA_AM_NPORT(am);
1085 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1086 	int ret, i, invalid = 0, call_set_mtu = 0;
1087 	int call_link_downgrade_policy = 0;
1088 
1089 	if (num_ports != 1) {
1090 		smp->status |= IB_SMP_INVALID_FIELD;
1091 		return reply((struct ib_mad_hdr *)smp);
1092 	}
1093 
1094 	opa_lid = be32_to_cpu(pi->lid);
1095 	if (opa_lid & 0xFFFF0000) {
1096 		pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1097 		smp->status |= IB_SMP_INVALID_FIELD;
1098 		goto get_only;
1099 	}
1100 
1101 	lid = (u16)(opa_lid & 0x0000FFFF);
1102 
1103 	smlid = be32_to_cpu(pi->sm_lid);
1104 	if (smlid & 0xFFFF0000) {
1105 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1106 		smp->status |= IB_SMP_INVALID_FIELD;
1107 		goto get_only;
1108 	}
1109 	smlid &= 0x0000FFFF;
1110 
1111 	clientrereg = (pi->clientrereg_subnettimeout &
1112 			OPA_PI_MASK_CLIENT_REREGISTER);
1113 
1114 	dd = dd_from_ibdev(ibdev);
1115 	/* IB numbers ports from 1, hw from 0 */
1116 	ppd = dd->pport + (port - 1);
1117 	ibp = &ppd->ibport_data;
1118 	event.device = ibdev;
1119 	event.element.port_num = port;
1120 
1121 	ls_old = driver_lstate(ppd);
1122 
1123 	ibp->rvp.mkey = pi->mkey;
1124 	ibp->rvp.gid_prefix = pi->subnet_prefix;
1125 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1126 
1127 	/* Must be a valid unicast LID address. */
1128 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1129 	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1130 		smp->status |= IB_SMP_INVALID_FIELD;
1131 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1132 			lid);
1133 	} else if (ppd->lid != lid ||
1134 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1135 		if (ppd->lid != lid)
1136 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1137 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1138 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1139 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1140 		event.event = IB_EVENT_LID_CHANGE;
1141 		ib_dispatch_event(&event);
1142 	}
1143 
1144 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1145 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1146 		ppd->linkinit_reason =
1147 			(pi->partenforce_filterraw &
1148 			 OPA_PI_MASK_LINKINIT_REASON);
1149 	/* enable/disable SW pkey checking as per FM control */
1150 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
1151 		ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
1152 	else
1153 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
1154 
1155 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
1156 		ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
1157 	else
1158 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
1159 
1160 	/* Must be a valid unicast LID address. */
1161 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1162 	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1163 		smp->status |= IB_SMP_INVALID_FIELD;
1164 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1165 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1166 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1167 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1168 		if (ibp->rvp.sm_ah) {
1169 			if (smlid != ibp->rvp.sm_lid)
1170 				ibp->rvp.sm_ah->attr.dlid = smlid;
1171 			if (msl != ibp->rvp.sm_sl)
1172 				ibp->rvp.sm_ah->attr.sl = msl;
1173 		}
1174 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1175 		if (smlid != ibp->rvp.sm_lid)
1176 			ibp->rvp.sm_lid = smlid;
1177 		if (msl != ibp->rvp.sm_sl)
1178 			ibp->rvp.sm_sl = msl;
1179 		event.event = IB_EVENT_SM_CHANGE;
1180 		ib_dispatch_event(&event);
1181 	}
1182 
1183 	if (pi->link_down_reason == 0) {
1184 		ppd->local_link_down_reason.sma = 0;
1185 		ppd->local_link_down_reason.latest = 0;
1186 	}
1187 
1188 	if (pi->neigh_link_down_reason == 0) {
1189 		ppd->neigh_link_down_reason.sma = 0;
1190 		ppd->neigh_link_down_reason.latest = 0;
1191 	}
1192 
1193 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1194 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1195 
1196 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1197 	lwe = be16_to_cpu(pi->link_width.enabled);
1198 	if (lwe) {
1199 		if (lwe == OPA_LINK_WIDTH_RESET ||
1200 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1201 			set_link_width_enabled(ppd, ppd->link_width_supported);
1202 		else if ((lwe & ~ppd->link_width_supported) == 0)
1203 			set_link_width_enabled(ppd, lwe);
1204 		else
1205 			smp->status |= IB_SMP_INVALID_FIELD;
1206 	}
1207 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1208 	/* LWD.E is always applied - 0 means "disabled" */
1209 	if (lwe == OPA_LINK_WIDTH_RESET ||
1210 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1211 		set_link_width_downgrade_enabled(ppd,
1212 						 ppd->
1213 						 link_width_downgrade_supported
1214 						 );
1215 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1216 		/* only set and apply if something changed */
1217 		if (lwe != ppd->link_width_downgrade_enabled) {
1218 			set_link_width_downgrade_enabled(ppd, lwe);
1219 			call_link_downgrade_policy = 1;
1220 		}
1221 	} else {
1222 		smp->status |= IB_SMP_INVALID_FIELD;
1223 	}
1224 	lse = be16_to_cpu(pi->link_speed.enabled);
1225 	if (lse) {
1226 		if (lse & be16_to_cpu(pi->link_speed.supported))
1227 			set_link_speed_enabled(ppd, lse);
1228 		else
1229 			smp->status |= IB_SMP_INVALID_FIELD;
1230 	}
1231 
1232 	ibp->rvp.mkeyprot =
1233 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1234 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1235 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1236 				    ibp->rvp.vl_high_limit);
1237 
1238 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1239 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1240 		smp->status |= IB_SMP_INVALID_FIELD;
1241 		return reply((struct ib_mad_hdr *)smp);
1242 	}
1243 	for (i = 0; i < ppd->vls_supported; i++) {
1244 		if ((i % 2) == 0)
1245 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1246 					   4) & 0xF);
1247 		else
1248 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1249 					  0xF);
1250 		if (mtu == 0xffff) {
1251 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1252 				mtu,
1253 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1254 			smp->status |= IB_SMP_INVALID_FIELD;
1255 			mtu = hfi1_max_mtu; /* use a valid MTU */
1256 		}
1257 		if (dd->vld[i].mtu != mtu) {
1258 			dd_dev_info(dd,
1259 				    "MTU change on vl %d from %d to %d\n",
1260 				    i, dd->vld[i].mtu, mtu);
1261 			dd->vld[i].mtu = mtu;
1262 			call_set_mtu++;
1263 		}
1264 	}
1265 	/* As per OPAV1 spec: VL15 must support and be configured
1266 	 * for operation with a 2048 or larger MTU.
1267 	 */
1268 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1269 	if (mtu < 2048 || mtu == 0xffff)
1270 		mtu = 2048;
1271 	if (dd->vld[15].mtu != mtu) {
1272 		dd_dev_info(dd,
1273 			    "MTU change on vl 15 from %d to %d\n",
1274 			    dd->vld[15].mtu, mtu);
1275 		dd->vld[15].mtu = mtu;
1276 		call_set_mtu++;
1277 	}
1278 	if (call_set_mtu)
1279 		set_mtu(ppd);
1280 
1281 	/* Set operational VLs */
1282 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1283 	if (vls) {
1284 		if (vls > ppd->vls_supported) {
1285 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1286 				pi->operational_vls);
1287 			smp->status |= IB_SMP_INVALID_FIELD;
1288 		} else {
1289 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1290 					    vls) == -EINVAL)
1291 				smp->status |= IB_SMP_INVALID_FIELD;
1292 		}
1293 	}
1294 
1295 	if (pi->mkey_violations == 0)
1296 		ibp->rvp.mkey_violations = 0;
1297 
1298 	if (pi->pkey_violations == 0)
1299 		ibp->rvp.pkey_violations = 0;
1300 
1301 	if (pi->qkey_violations == 0)
1302 		ibp->rvp.qkey_violations = 0;
1303 
1304 	ibp->rvp.subnet_timeout =
1305 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1306 
1307 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1308 	crc_enabled >>= 4;
1309 	crc_enabled &= 0xf;
1310 
1311 	if (crc_enabled != 0)
1312 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1313 
1314 	ppd->is_active_optimize_enabled =
1315 			!!(be16_to_cpu(pi->port_mode)
1316 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1317 
1318 	ls_new = pi->port_states.portphysstate_portstate &
1319 			OPA_PI_MASK_PORT_STATE;
1320 	ps_new = (pi->port_states.portphysstate_portstate &
1321 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1322 
1323 	if (ls_old == IB_PORT_INIT) {
1324 		if (start_of_sm_config) {
1325 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1326 				ppd->is_sm_config_started = 1;
1327 		} else if (ls_new == IB_PORT_ARMED) {
1328 			if (ppd->is_sm_config_started == 0)
1329 				invalid = 1;
1330 		}
1331 	}
1332 
1333 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1334 	if (clientrereg) {
1335 		event.event = IB_EVENT_CLIENT_REREGISTER;
1336 		ib_dispatch_event(&event);
1337 	}
1338 
1339 	/*
1340 	 * Do the port state change now that the other link parameters
1341 	 * have been set.
1342 	 * Changing the port physical state only makes sense if the link
1343 	 * is down or is being set to down.
1344 	 */
1345 
1346 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1347 	if (ret)
1348 		return ret;
1349 
1350 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1351 
1352 	/* restore re-reg bit per o14-12.2.1 */
1353 	pi->clientrereg_subnettimeout |= clientrereg;
1354 
1355 	/*
1356 	 * Apply the new link downgrade policy.  This may result in a link
1357 	 * bounce.  Do this after everything else so things are settled.
1358 	 * Possible problem: if setting the port state above fails, then
1359 	 * the policy change is not applied.
1360 	 */
1361 	if (call_link_downgrade_policy)
1362 		apply_link_downgrade_policy(ppd, 0);
1363 
1364 	return ret;
1365 
1366 get_only:
1367 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1368 }
1369 
1370 /**
1371  * set_pkeys - set the PKEY table for ctxt 0
1372  * @dd: the hfi1_ib device
1373  * @port: the IB port number
1374  * @pkeys: the PKEY table
1375  */
1376 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1377 {
1378 	struct hfi1_pportdata *ppd;
1379 	int i;
1380 	int changed = 0;
1381 	int update_includes_mgmt_partition = 0;
1382 
1383 	/*
1384 	 * IB port one/two always maps to context zero/one,
1385 	 * always a kernel context, no locking needed
1386 	 * If we get here with ppd setup, no need to check
1387 	 * that rcd is valid.
1388 	 */
1389 	ppd = dd->pport + (port - 1);
1390 	/*
1391 	 * If the update does not include the management pkey, don't do it.
1392 	 */
1393 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1394 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1395 			update_includes_mgmt_partition = 1;
1396 			break;
1397 		}
1398 	}
1399 
1400 	if (!update_includes_mgmt_partition)
1401 		return 1;
1402 
1403 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1404 		u16 key = pkeys[i];
1405 		u16 okey = ppd->pkeys[i];
1406 
1407 		if (key == okey)
1408 			continue;
1409 		/*
1410 		 * The SM gives us the complete PKey table. We have
1411 		 * to ensure that we put the PKeys in the matching
1412 		 * slots.
1413 		 */
1414 		ppd->pkeys[i] = key;
1415 		changed = 1;
1416 	}
1417 
1418 	if (changed) {
1419 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1420 		hfi1_event_pkey_change(dd, port);
1421 	}
1422 
1423 	return 0;
1424 }
1425 
1426 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1427 				    struct ib_device *ibdev, u8 port,
1428 				    u32 *resp_len)
1429 {
1430 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1431 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1432 	u32 start_block = am & 0x7ff;
1433 	u16 *p = (u16 *)data;
1434 	__be16 *q = (__be16 *)data;
1435 	int i;
1436 	u16 n_blocks_avail;
1437 	unsigned npkeys = hfi1_get_npkeys(dd);
1438 
1439 	if (n_blocks_sent == 0) {
1440 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1441 			port, start_block, n_blocks_sent);
1442 		smp->status |= IB_SMP_INVALID_FIELD;
1443 		return reply((struct ib_mad_hdr *)smp);
1444 	}
1445 
1446 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1447 
1448 	if (start_block + n_blocks_sent > n_blocks_avail ||
1449 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1450 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1451 			start_block, n_blocks_sent, n_blocks_avail,
1452 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1453 		smp->status |= IB_SMP_INVALID_FIELD;
1454 		return reply((struct ib_mad_hdr *)smp);
1455 	}
1456 
1457 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1458 		p[i] = be16_to_cpu(q[i]);
1459 
1460 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1461 		smp->status |= IB_SMP_INVALID_FIELD;
1462 		return reply((struct ib_mad_hdr *)smp);
1463 	}
1464 
1465 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1466 }
1467 
1468 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1469 {
1470 	u64 *val = data;
1471 
1472 	*val++ = read_csr(dd, SEND_SC2VLT0);
1473 	*val++ = read_csr(dd, SEND_SC2VLT1);
1474 	*val++ = read_csr(dd, SEND_SC2VLT2);
1475 	*val++ = read_csr(dd, SEND_SC2VLT3);
1476 	return 0;
1477 }
1478 
1479 #define ILLEGAL_VL 12
1480 /*
1481  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1482  * for SC15, which must map to VL15). If we don't remap things this
1483  * way it is possible for VL15 counters to increment when we try to
1484  * send on a SC which is mapped to an invalid VL.
1485  */
1486 static void filter_sc2vlt(void *data)
1487 {
1488 	int i;
1489 	u8 *pd = data;
1490 
1491 	for (i = 0; i < OPA_MAX_SCS; i++) {
1492 		if (i == 15)
1493 			continue;
1494 		if ((pd[i] & 0x1f) == 0xf)
1495 			pd[i] = ILLEGAL_VL;
1496 	}
1497 }
1498 
1499 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1500 {
1501 	u64 *val = data;
1502 
1503 	filter_sc2vlt(data);
1504 
1505 	write_csr(dd, SEND_SC2VLT0, *val++);
1506 	write_csr(dd, SEND_SC2VLT1, *val++);
1507 	write_csr(dd, SEND_SC2VLT2, *val++);
1508 	write_csr(dd, SEND_SC2VLT3, *val++);
1509 	write_seqlock_irq(&dd->sc2vl_lock);
1510 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1511 	write_sequnlock_irq(&dd->sc2vl_lock);
1512 	return 0;
1513 }
1514 
1515 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1516 				   struct ib_device *ibdev, u8 port,
1517 				   u32 *resp_len)
1518 {
1519 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1520 	u8 *p = data;
1521 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1522 	unsigned i;
1523 
1524 	if (am) {
1525 		smp->status |= IB_SMP_INVALID_FIELD;
1526 		return reply((struct ib_mad_hdr *)smp);
1527 	}
1528 
1529 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1530 		*p++ = ibp->sl_to_sc[i];
1531 
1532 	if (resp_len)
1533 		*resp_len += size;
1534 
1535 	return reply((struct ib_mad_hdr *)smp);
1536 }
1537 
1538 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1539 				   struct ib_device *ibdev, u8 port,
1540 				   u32 *resp_len)
1541 {
1542 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1543 	u8 *p = data;
1544 	int i;
1545 	u8 sc;
1546 
1547 	if (am) {
1548 		smp->status |= IB_SMP_INVALID_FIELD;
1549 		return reply((struct ib_mad_hdr *)smp);
1550 	}
1551 
1552 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1553 		sc = *p++;
1554 		if (ibp->sl_to_sc[i] != sc) {
1555 			ibp->sl_to_sc[i] = sc;
1556 
1557 			/* Put all stale qps into error state */
1558 			hfi1_error_port_qps(ibp, i);
1559 		}
1560 	}
1561 
1562 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1563 }
1564 
1565 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1566 				   struct ib_device *ibdev, u8 port,
1567 				   u32 *resp_len)
1568 {
1569 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1570 	u8 *p = data;
1571 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1572 	unsigned i;
1573 
1574 	if (am) {
1575 		smp->status |= IB_SMP_INVALID_FIELD;
1576 		return reply((struct ib_mad_hdr *)smp);
1577 	}
1578 
1579 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1580 		*p++ = ibp->sc_to_sl[i];
1581 
1582 	if (resp_len)
1583 		*resp_len += size;
1584 
1585 	return reply((struct ib_mad_hdr *)smp);
1586 }
1587 
1588 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1589 				   struct ib_device *ibdev, u8 port,
1590 				   u32 *resp_len)
1591 {
1592 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1593 	u8 *p = data;
1594 	int i;
1595 
1596 	if (am) {
1597 		smp->status |= IB_SMP_INVALID_FIELD;
1598 		return reply((struct ib_mad_hdr *)smp);
1599 	}
1600 
1601 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1602 		ibp->sc_to_sl[i] = *p++;
1603 
1604 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1605 }
1606 
1607 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1608 				    struct ib_device *ibdev, u8 port,
1609 				    u32 *resp_len)
1610 {
1611 	u32 n_blocks = OPA_AM_NBLK(am);
1612 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1613 	void *vp = (void *)data;
1614 	size_t size = 4 * sizeof(u64);
1615 
1616 	if (n_blocks != 1) {
1617 		smp->status |= IB_SMP_INVALID_FIELD;
1618 		return reply((struct ib_mad_hdr *)smp);
1619 	}
1620 
1621 	get_sc2vlt_tables(dd, vp);
1622 
1623 	if (resp_len)
1624 		*resp_len += size;
1625 
1626 	return reply((struct ib_mad_hdr *)smp);
1627 }
1628 
1629 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1630 				    struct ib_device *ibdev, u8 port,
1631 				    u32 *resp_len)
1632 {
1633 	u32 n_blocks = OPA_AM_NBLK(am);
1634 	int async_update = OPA_AM_ASYNC(am);
1635 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1636 	void *vp = (void *)data;
1637 	struct hfi1_pportdata *ppd;
1638 	int lstate;
1639 
1640 	if (n_blocks != 1 || async_update) {
1641 		smp->status |= IB_SMP_INVALID_FIELD;
1642 		return reply((struct ib_mad_hdr *)smp);
1643 	}
1644 
1645 	/* IB numbers ports from 1, hw from 0 */
1646 	ppd = dd->pport + (port - 1);
1647 	lstate = driver_lstate(ppd);
1648 	/*
1649 	 * it's known that async_update is 0 by this point, but include
1650 	 * the explicit check for clarity
1651 	 */
1652 	if (!async_update &&
1653 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1654 		smp->status |= IB_SMP_INVALID_FIELD;
1655 		return reply((struct ib_mad_hdr *)smp);
1656 	}
1657 
1658 	set_sc2vlt_tables(dd, vp);
1659 
1660 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1661 }
1662 
1663 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1664 				     struct ib_device *ibdev, u8 port,
1665 				     u32 *resp_len)
1666 {
1667 	u32 n_blocks = OPA_AM_NPORT(am);
1668 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1669 	struct hfi1_pportdata *ppd;
1670 	void *vp = (void *)data;
1671 	int size;
1672 
1673 	if (n_blocks != 1) {
1674 		smp->status |= IB_SMP_INVALID_FIELD;
1675 		return reply((struct ib_mad_hdr *)smp);
1676 	}
1677 
1678 	ppd = dd->pport + (port - 1);
1679 
1680 	size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1681 
1682 	if (resp_len)
1683 		*resp_len += size;
1684 
1685 	return reply((struct ib_mad_hdr *)smp);
1686 }
1687 
1688 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1689 				     struct ib_device *ibdev, u8 port,
1690 				     u32 *resp_len)
1691 {
1692 	u32 n_blocks = OPA_AM_NPORT(am);
1693 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1694 	struct hfi1_pportdata *ppd;
1695 	void *vp = (void *)data;
1696 	int lstate;
1697 
1698 	if (n_blocks != 1) {
1699 		smp->status |= IB_SMP_INVALID_FIELD;
1700 		return reply((struct ib_mad_hdr *)smp);
1701 	}
1702 
1703 	/* IB numbers ports from 1, hw from 0 */
1704 	ppd = dd->pport + (port - 1);
1705 	lstate = driver_lstate(ppd);
1706 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1707 		smp->status |= IB_SMP_INVALID_FIELD;
1708 		return reply((struct ib_mad_hdr *)smp);
1709 	}
1710 
1711 	ppd = dd->pport + (port - 1);
1712 
1713 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1714 
1715 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1716 					 resp_len);
1717 }
1718 
1719 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1720 			      struct ib_device *ibdev, u8 port,
1721 			      u32 *resp_len)
1722 {
1723 	u32 nports = OPA_AM_NPORT(am);
1724 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1725 	u32 lstate;
1726 	struct hfi1_ibport *ibp;
1727 	struct hfi1_pportdata *ppd;
1728 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1729 
1730 	if (nports != 1) {
1731 		smp->status |= IB_SMP_INVALID_FIELD;
1732 		return reply((struct ib_mad_hdr *)smp);
1733 	}
1734 
1735 	ibp = to_iport(ibdev, port);
1736 	ppd = ppd_from_ibp(ibp);
1737 
1738 	lstate = driver_lstate(ppd);
1739 
1740 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
1741 		ppd->is_sm_config_started = 1;
1742 
1743 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1744 	psi->port_states.ledenable_offlinereason |=
1745 		ppd->is_sm_config_started << 5;
1746 	psi->port_states.ledenable_offlinereason |=
1747 		ppd->offline_disabled_reason;
1748 
1749 	psi->port_states.portphysstate_portstate =
1750 		(hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1751 	psi->link_width_downgrade_tx_active =
1752 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
1753 	psi->link_width_downgrade_rx_active =
1754 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
1755 	if (resp_len)
1756 		*resp_len += sizeof(struct opa_port_state_info);
1757 
1758 	return reply((struct ib_mad_hdr *)smp);
1759 }
1760 
1761 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1762 			      struct ib_device *ibdev, u8 port,
1763 			      u32 *resp_len)
1764 {
1765 	u32 nports = OPA_AM_NPORT(am);
1766 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1767 	u32 ls_old;
1768 	u8 ls_new, ps_new;
1769 	struct hfi1_ibport *ibp;
1770 	struct hfi1_pportdata *ppd;
1771 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1772 	int ret, invalid = 0;
1773 
1774 	if (nports != 1) {
1775 		smp->status |= IB_SMP_INVALID_FIELD;
1776 		return reply((struct ib_mad_hdr *)smp);
1777 	}
1778 
1779 	ibp = to_iport(ibdev, port);
1780 	ppd = ppd_from_ibp(ibp);
1781 
1782 	ls_old = driver_lstate(ppd);
1783 
1784 	ls_new = port_states_to_logical_state(&psi->port_states);
1785 	ps_new = port_states_to_phys_state(&psi->port_states);
1786 
1787 	if (ls_old == IB_PORT_INIT) {
1788 		if (start_of_sm_config) {
1789 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1790 				ppd->is_sm_config_started = 1;
1791 		} else if (ls_new == IB_PORT_ARMED) {
1792 			if (ppd->is_sm_config_started == 0)
1793 				invalid = 1;
1794 		}
1795 	}
1796 
1797 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1798 	if (ret)
1799 		return ret;
1800 
1801 	if (invalid)
1802 		smp->status |= IB_SMP_INVALID_FIELD;
1803 
1804 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1805 }
1806 
1807 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1808 				     struct ib_device *ibdev, u8 port,
1809 				     u32 *resp_len)
1810 {
1811 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1812 	u32 addr = OPA_AM_CI_ADDR(am);
1813 	u32 len = OPA_AM_CI_LEN(am) + 1;
1814 	int ret;
1815 
1816 	if (dd->pport->port_type != PORT_TYPE_QSFP) {
1817 		smp->status |= IB_SMP_INVALID_FIELD;
1818 		return reply((struct ib_mad_hdr *)smp);
1819 	}
1820 
1821 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1822 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1823 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1824 
1825 	/*
1826 	 * check that addr is within spec, and
1827 	 * addr and (addr + len - 1) are on the same "page"
1828 	 */
1829 	if (addr >= 4096 ||
1830 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1831 		smp->status |= IB_SMP_INVALID_FIELD;
1832 		return reply((struct ib_mad_hdr *)smp);
1833 	}
1834 
1835 	ret = get_cable_info(dd, port, addr, len, data);
1836 
1837 	if (ret == -ENODEV) {
1838 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1839 		return reply((struct ib_mad_hdr *)smp);
1840 	}
1841 
1842 	/* The address range for the CableInfo SMA query is wider than the
1843 	 * memory available on the QSFP cable. We want to return a valid
1844 	 * response, albeit zeroed out, for address ranges beyond available
1845 	 * memory but that are within the CableInfo query spec
1846 	 */
1847 	if (ret < 0 && ret != -ERANGE) {
1848 		smp->status |= IB_SMP_INVALID_FIELD;
1849 		return reply((struct ib_mad_hdr *)smp);
1850 	}
1851 
1852 	if (resp_len)
1853 		*resp_len += len;
1854 
1855 	return reply((struct ib_mad_hdr *)smp);
1856 }
1857 
1858 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1859 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1860 {
1861 	u32 num_ports = OPA_AM_NPORT(am);
1862 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1863 	struct hfi1_pportdata *ppd;
1864 	struct buffer_control *p = (struct buffer_control *)data;
1865 	int size;
1866 
1867 	if (num_ports != 1) {
1868 		smp->status |= IB_SMP_INVALID_FIELD;
1869 		return reply((struct ib_mad_hdr *)smp);
1870 	}
1871 
1872 	ppd = dd->pport + (port - 1);
1873 	size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1874 	trace_bct_get(dd, p);
1875 	if (resp_len)
1876 		*resp_len += size;
1877 
1878 	return reply((struct ib_mad_hdr *)smp);
1879 }
1880 
1881 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1882 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1883 {
1884 	u32 num_ports = OPA_AM_NPORT(am);
1885 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1886 	struct hfi1_pportdata *ppd;
1887 	struct buffer_control *p = (struct buffer_control *)data;
1888 
1889 	if (num_ports != 1) {
1890 		smp->status |= IB_SMP_INVALID_FIELD;
1891 		return reply((struct ib_mad_hdr *)smp);
1892 	}
1893 	ppd = dd->pport + (port - 1);
1894 	trace_bct_set(dd, p);
1895 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1896 		smp->status |= IB_SMP_INVALID_FIELD;
1897 		return reply((struct ib_mad_hdr *)smp);
1898 	}
1899 
1900 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1901 }
1902 
1903 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1904 				 struct ib_device *ibdev, u8 port,
1905 				 u32 *resp_len)
1906 {
1907 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1908 	u32 num_ports = OPA_AM_NPORT(am);
1909 	u8 section = (am & 0x00ff0000) >> 16;
1910 	u8 *p = data;
1911 	int size = 0;
1912 
1913 	if (num_ports != 1) {
1914 		smp->status |= IB_SMP_INVALID_FIELD;
1915 		return reply((struct ib_mad_hdr *)smp);
1916 	}
1917 
1918 	switch (section) {
1919 	case OPA_VLARB_LOW_ELEMENTS:
1920 		size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1921 		break;
1922 	case OPA_VLARB_HIGH_ELEMENTS:
1923 		size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1924 		break;
1925 	case OPA_VLARB_PREEMPT_ELEMENTS:
1926 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1927 		break;
1928 	case OPA_VLARB_PREEMPT_MATRIX:
1929 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1930 		break;
1931 	default:
1932 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1933 			be32_to_cpu(smp->attr_mod));
1934 		smp->status |= IB_SMP_INVALID_FIELD;
1935 		break;
1936 	}
1937 
1938 	if (size > 0 && resp_len)
1939 		*resp_len += size;
1940 
1941 	return reply((struct ib_mad_hdr *)smp);
1942 }
1943 
1944 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1945 				 struct ib_device *ibdev, u8 port,
1946 				 u32 *resp_len)
1947 {
1948 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1949 	u32 num_ports = OPA_AM_NPORT(am);
1950 	u8 section = (am & 0x00ff0000) >> 16;
1951 	u8 *p = data;
1952 
1953 	if (num_ports != 1) {
1954 		smp->status |= IB_SMP_INVALID_FIELD;
1955 		return reply((struct ib_mad_hdr *)smp);
1956 	}
1957 
1958 	switch (section) {
1959 	case OPA_VLARB_LOW_ELEMENTS:
1960 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1961 		break;
1962 	case OPA_VLARB_HIGH_ELEMENTS:
1963 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1964 		break;
1965 	/*
1966 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1967 	 * can be changed from the default values
1968 	 */
1969 	case OPA_VLARB_PREEMPT_ELEMENTS:
1970 		/* FALLTHROUGH */
1971 	case OPA_VLARB_PREEMPT_MATRIX:
1972 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1973 		break;
1974 	default:
1975 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1976 			be32_to_cpu(smp->attr_mod));
1977 		smp->status |= IB_SMP_INVALID_FIELD;
1978 		break;
1979 	}
1980 
1981 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1982 }
1983 
1984 struct opa_pma_mad {
1985 	struct ib_mad_hdr mad_hdr;
1986 	u8 data[2024];
1987 } __packed;
1988 
1989 struct opa_class_port_info {
1990 	u8 base_version;
1991 	u8 class_version;
1992 	__be16 cap_mask;
1993 	__be32 cap_mask2_resp_time;
1994 
1995 	u8 redirect_gid[16];
1996 	__be32 redirect_tc_fl;
1997 	__be32 redirect_lid;
1998 	__be32 redirect_sl_qp;
1999 	__be32 redirect_qkey;
2000 
2001 	u8 trap_gid[16];
2002 	__be32 trap_tc_fl;
2003 	__be32 trap_lid;
2004 	__be32 trap_hl_qp;
2005 	__be32 trap_qkey;
2006 
2007 	__be16 trap_pkey;
2008 	__be16 redirect_pkey;
2009 
2010 	u8 trap_sl_rsvd;
2011 	u8 reserved[3];
2012 } __packed;
2013 
2014 struct opa_port_status_req {
2015 	__u8 port_num;
2016 	__u8 reserved[3];
2017 	__be32 vl_select_mask;
2018 };
2019 
2020 #define VL_MASK_ALL		0x000080ff
2021 
2022 struct opa_port_status_rsp {
2023 	__u8 port_num;
2024 	__u8 reserved[3];
2025 	__be32  vl_select_mask;
2026 
2027 	/* Data counters */
2028 	__be64 port_xmit_data;
2029 	__be64 port_rcv_data;
2030 	__be64 port_xmit_pkts;
2031 	__be64 port_rcv_pkts;
2032 	__be64 port_multicast_xmit_pkts;
2033 	__be64 port_multicast_rcv_pkts;
2034 	__be64 port_xmit_wait;
2035 	__be64 sw_port_congestion;
2036 	__be64 port_rcv_fecn;
2037 	__be64 port_rcv_becn;
2038 	__be64 port_xmit_time_cong;
2039 	__be64 port_xmit_wasted_bw;
2040 	__be64 port_xmit_wait_data;
2041 	__be64 port_rcv_bubble;
2042 	__be64 port_mark_fecn;
2043 	/* Error counters */
2044 	__be64 port_rcv_constraint_errors;
2045 	__be64 port_rcv_switch_relay_errors;
2046 	__be64 port_xmit_discards;
2047 	__be64 port_xmit_constraint_errors;
2048 	__be64 port_rcv_remote_physical_errors;
2049 	__be64 local_link_integrity_errors;
2050 	__be64 port_rcv_errors;
2051 	__be64 excessive_buffer_overruns;
2052 	__be64 fm_config_errors;
2053 	__be32 link_error_recovery;
2054 	__be32 link_downed;
2055 	u8 uncorrectable_errors;
2056 
2057 	u8 link_quality_indicator; /* 5res, 3bit */
2058 	u8 res2[6];
2059 	struct _vls_pctrs {
2060 		/* per-VL Data counters */
2061 		__be64 port_vl_xmit_data;
2062 		__be64 port_vl_rcv_data;
2063 		__be64 port_vl_xmit_pkts;
2064 		__be64 port_vl_rcv_pkts;
2065 		__be64 port_vl_xmit_wait;
2066 		__be64 sw_port_vl_congestion;
2067 		__be64 port_vl_rcv_fecn;
2068 		__be64 port_vl_rcv_becn;
2069 		__be64 port_xmit_time_cong;
2070 		__be64 port_vl_xmit_wasted_bw;
2071 		__be64 port_vl_xmit_wait_data;
2072 		__be64 port_vl_rcv_bubble;
2073 		__be64 port_vl_mark_fecn;
2074 		__be64 port_vl_xmit_discards;
2075 	} vls[0]; /* real array size defined by # bits set in vl_select_mask */
2076 };
2077 
2078 enum counter_selects {
2079 	CS_PORT_XMIT_DATA			= (1 << 31),
2080 	CS_PORT_RCV_DATA			= (1 << 30),
2081 	CS_PORT_XMIT_PKTS			= (1 << 29),
2082 	CS_PORT_RCV_PKTS			= (1 << 28),
2083 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2084 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2085 	CS_PORT_XMIT_WAIT			= (1 << 25),
2086 	CS_SW_PORT_CONGESTION			= (1 << 24),
2087 	CS_PORT_RCV_FECN			= (1 << 23),
2088 	CS_PORT_RCV_BECN			= (1 << 22),
2089 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2090 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2091 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2092 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2093 	CS_PORT_MARK_FECN			= (1 << 17),
2094 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2095 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2096 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2097 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2098 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2099 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2100 	CS_PORT_RCV_ERRORS			= (1 << 10),
2101 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2102 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2103 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2104 	CS_LINK_DOWNED				= (1 << 6),
2105 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2106 };
2107 
2108 struct opa_clear_port_status {
2109 	__be64 port_select_mask[4];
2110 	__be32 counter_select_mask;
2111 };
2112 
2113 struct opa_aggregate {
2114 	__be16 attr_id;
2115 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2116 	__be32 attr_mod;
2117 	u8 data[0];
2118 };
2119 
2120 #define MSK_LLI 0x000000f0
2121 #define MSK_LLI_SFT 4
2122 #define MSK_LER 0x0000000f
2123 #define MSK_LER_SFT 0
2124 #define ADD_LLI 8
2125 #define ADD_LER 2
2126 
2127 /* Request contains first three fields, response contains those plus the rest */
2128 struct opa_port_data_counters_msg {
2129 	__be64 port_select_mask[4];
2130 	__be32 vl_select_mask;
2131 	__be32 resolution;
2132 
2133 	/* Response fields follow */
2134 	struct _port_dctrs {
2135 		u8 port_number;
2136 		u8 reserved2[3];
2137 		__be32 link_quality_indicator; /* 29res, 3bit */
2138 
2139 		/* Data counters */
2140 		__be64 port_xmit_data;
2141 		__be64 port_rcv_data;
2142 		__be64 port_xmit_pkts;
2143 		__be64 port_rcv_pkts;
2144 		__be64 port_multicast_xmit_pkts;
2145 		__be64 port_multicast_rcv_pkts;
2146 		__be64 port_xmit_wait;
2147 		__be64 sw_port_congestion;
2148 		__be64 port_rcv_fecn;
2149 		__be64 port_rcv_becn;
2150 		__be64 port_xmit_time_cong;
2151 		__be64 port_xmit_wasted_bw;
2152 		__be64 port_xmit_wait_data;
2153 		__be64 port_rcv_bubble;
2154 		__be64 port_mark_fecn;
2155 
2156 		__be64 port_error_counter_summary;
2157 		/* Sum of error counts/port */
2158 
2159 		struct _vls_dctrs {
2160 			/* per-VL Data counters */
2161 			__be64 port_vl_xmit_data;
2162 			__be64 port_vl_rcv_data;
2163 			__be64 port_vl_xmit_pkts;
2164 			__be64 port_vl_rcv_pkts;
2165 			__be64 port_vl_xmit_wait;
2166 			__be64 sw_port_vl_congestion;
2167 			__be64 port_vl_rcv_fecn;
2168 			__be64 port_vl_rcv_becn;
2169 			__be64 port_xmit_time_cong;
2170 			__be64 port_vl_xmit_wasted_bw;
2171 			__be64 port_vl_xmit_wait_data;
2172 			__be64 port_vl_rcv_bubble;
2173 			__be64 port_vl_mark_fecn;
2174 		} vls[0];
2175 		/* array size defined by #bits set in vl_select_mask*/
2176 	} port[1]; /* array size defined by  #ports in attribute modifier */
2177 };
2178 
2179 struct opa_port_error_counters64_msg {
2180 	/*
2181 	 * Request contains first two fields, response contains the
2182 	 * whole magilla
2183 	 */
2184 	__be64 port_select_mask[4];
2185 	__be32 vl_select_mask;
2186 
2187 	/* Response-only fields follow */
2188 	__be32 reserved1;
2189 	struct _port_ectrs {
2190 		u8 port_number;
2191 		u8 reserved2[7];
2192 		__be64 port_rcv_constraint_errors;
2193 		__be64 port_rcv_switch_relay_errors;
2194 		__be64 port_xmit_discards;
2195 		__be64 port_xmit_constraint_errors;
2196 		__be64 port_rcv_remote_physical_errors;
2197 		__be64 local_link_integrity_errors;
2198 		__be64 port_rcv_errors;
2199 		__be64 excessive_buffer_overruns;
2200 		__be64 fm_config_errors;
2201 		__be32 link_error_recovery;
2202 		__be32 link_downed;
2203 		u8 uncorrectable_errors;
2204 		u8 reserved3[7];
2205 		struct _vls_ectrs {
2206 			__be64 port_vl_xmit_discards;
2207 		} vls[0];
2208 		/* array size defined by #bits set in vl_select_mask */
2209 	} port[1]; /* array size defined by #ports in attribute modifier */
2210 };
2211 
2212 struct opa_port_error_info_msg {
2213 	__be64 port_select_mask[4];
2214 	__be32 error_info_select_mask;
2215 	__be32 reserved1;
2216 	struct _port_ei {
2217 		u8 port_number;
2218 		u8 reserved2[7];
2219 
2220 		/* PortRcvErrorInfo */
2221 		struct {
2222 			u8 status_and_code;
2223 			union {
2224 				u8 raw[17];
2225 				struct {
2226 					/* EI1to12 format */
2227 					u8 packet_flit1[8];
2228 					u8 packet_flit2[8];
2229 					u8 remaining_flit_bits12;
2230 				} ei1to12;
2231 				struct {
2232 					u8 packet_bytes[8];
2233 					u8 remaining_flit_bits;
2234 				} ei13;
2235 			} ei;
2236 			u8 reserved3[6];
2237 		} __packed port_rcv_ei;
2238 
2239 		/* ExcessiveBufferOverrunInfo */
2240 		struct {
2241 			u8 status_and_sc;
2242 			u8 reserved4[7];
2243 		} __packed excessive_buffer_overrun_ei;
2244 
2245 		/* PortXmitConstraintErrorInfo */
2246 		struct {
2247 			u8 status;
2248 			u8 reserved5;
2249 			__be16 pkey;
2250 			__be32 slid;
2251 		} __packed port_xmit_constraint_ei;
2252 
2253 		/* PortRcvConstraintErrorInfo */
2254 		struct {
2255 			u8 status;
2256 			u8 reserved6;
2257 			__be16 pkey;
2258 			__be32 slid;
2259 		} __packed port_rcv_constraint_ei;
2260 
2261 		/* PortRcvSwitchRelayErrorInfo */
2262 		struct {
2263 			u8 status_and_code;
2264 			u8 reserved7[3];
2265 			__u32 error_info;
2266 		} __packed port_rcv_switch_relay_ei;
2267 
2268 		/* UncorrectableErrorInfo */
2269 		struct {
2270 			u8 status_and_code;
2271 			u8 reserved8;
2272 		} __packed uncorrectable_ei;
2273 
2274 		/* FMConfigErrorInfo */
2275 		struct {
2276 			u8 status_and_code;
2277 			u8 error_info;
2278 		} __packed fm_config_ei;
2279 		__u32 reserved9;
2280 	} port[1]; /* actual array size defined by #ports in attr modifier */
2281 };
2282 
2283 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2284 enum error_info_selects {
2285 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2286 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2287 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2288 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2289 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2290 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2291 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2292 };
2293 
2294 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2295 				     struct ib_device *ibdev, u32 *resp_len)
2296 {
2297 	struct opa_class_port_info *p =
2298 		(struct opa_class_port_info *)pmp->data;
2299 
2300 	memset(pmp->data, 0, sizeof(pmp->data));
2301 
2302 	if (pmp->mad_hdr.attr_mod != 0)
2303 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2304 
2305 	p->base_version = OPA_MGMT_BASE_VERSION;
2306 	p->class_version = OPA_SM_CLASS_VERSION;
2307 	/*
2308 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2309 	 */
2310 	p->cap_mask2_resp_time = cpu_to_be32(18);
2311 
2312 	if (resp_len)
2313 		*resp_len += sizeof(*p);
2314 
2315 	return reply((struct ib_mad_hdr *)pmp);
2316 }
2317 
2318 static void a0_portstatus(struct hfi1_pportdata *ppd,
2319 			  struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2320 {
2321 	if (!is_bx(ppd->dd)) {
2322 		unsigned long vl;
2323 		u64 sum_vl_xmit_wait = 0;
2324 		u32 vl_all_mask = VL_MASK_ALL;
2325 
2326 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2327 				 8 * sizeof(vl_all_mask)) {
2328 			u64 tmp = sum_vl_xmit_wait +
2329 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2330 						 idx_from_vl(vl));
2331 			if (tmp < sum_vl_xmit_wait) {
2332 				/* we wrapped */
2333 				sum_vl_xmit_wait = (u64)~0;
2334 				break;
2335 			}
2336 			sum_vl_xmit_wait = tmp;
2337 		}
2338 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2339 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2340 	}
2341 }
2342 
2343 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2344 				  struct ib_device *ibdev,
2345 				  u8 port, u32 *resp_len)
2346 {
2347 	struct opa_port_status_req *req =
2348 		(struct opa_port_status_req *)pmp->data;
2349 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2350 	struct opa_port_status_rsp *rsp;
2351 	u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2352 	unsigned long vl;
2353 	size_t response_data_size;
2354 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2355 	u8 port_num = req->port_num;
2356 	u8 num_vls = hweight32(vl_select_mask);
2357 	struct _vls_pctrs *vlinfo;
2358 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2359 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2360 	int vfi;
2361 	u64 tmp, tmp2;
2362 
2363 	response_data_size = sizeof(struct opa_port_status_rsp) +
2364 				num_vls * sizeof(struct _vls_pctrs);
2365 	if (response_data_size > sizeof(pmp->data)) {
2366 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2367 		return reply((struct ib_mad_hdr *)pmp);
2368 	}
2369 
2370 	if (nports != 1 || (port_num && port_num != port) ||
2371 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2372 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2373 		return reply((struct ib_mad_hdr *)pmp);
2374 	}
2375 
2376 	memset(pmp->data, 0, sizeof(pmp->data));
2377 
2378 	rsp = (struct opa_port_status_rsp *)pmp->data;
2379 	if (port_num)
2380 		rsp->port_num = port_num;
2381 	else
2382 		rsp->port_num = port;
2383 
2384 	rsp->port_rcv_constraint_errors =
2385 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2386 					   CNTR_INVALID_VL));
2387 
2388 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2389 
2390 	rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2391 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2392 					  CNTR_INVALID_VL));
2393 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2394 					 CNTR_INVALID_VL));
2395 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2396 					  CNTR_INVALID_VL));
2397 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2398 					 CNTR_INVALID_VL));
2399 	rsp->port_multicast_xmit_pkts =
2400 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2401 					  CNTR_INVALID_VL));
2402 	rsp->port_multicast_rcv_pkts =
2403 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2404 					  CNTR_INVALID_VL));
2405 	rsp->port_xmit_wait =
2406 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2407 	rsp->port_rcv_fecn =
2408 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2409 	rsp->port_rcv_becn =
2410 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2411 	rsp->port_xmit_discards =
2412 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2413 					   CNTR_INVALID_VL));
2414 	rsp->port_xmit_constraint_errors =
2415 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2416 					   CNTR_INVALID_VL));
2417 	rsp->port_rcv_remote_physical_errors =
2418 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2419 					  CNTR_INVALID_VL));
2420 	rsp->local_link_integrity_errors =
2421 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2422 					  CNTR_INVALID_VL));
2423 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2424 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2425 				   CNTR_INVALID_VL);
2426 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2427 		/* overflow/wrapped */
2428 		rsp->link_error_recovery = cpu_to_be32(~0);
2429 	} else {
2430 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2431 	}
2432 	rsp->port_rcv_errors =
2433 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2434 	rsp->excessive_buffer_overruns =
2435 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2436 	rsp->fm_config_errors =
2437 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2438 					  CNTR_INVALID_VL));
2439 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2440 						      CNTR_INVALID_VL));
2441 
2442 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2443 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2444 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2445 
2446 	vlinfo = &rsp->vls[0];
2447 	vfi = 0;
2448 	/* The vl_select_mask has been checked above, and we know
2449 	 * that it contains only entries which represent valid VLs.
2450 	 * So in the for_each_set_bit() loop below, we don't need
2451 	 * any additional checks for vl.
2452 	 */
2453 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2454 			 8 * sizeof(vl_select_mask)) {
2455 		memset(vlinfo, 0, sizeof(*vlinfo));
2456 
2457 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2458 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2459 
2460 		rsp->vls[vfi].port_vl_rcv_pkts =
2461 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2462 						  idx_from_vl(vl)));
2463 
2464 		rsp->vls[vfi].port_vl_xmit_data =
2465 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2466 						   idx_from_vl(vl)));
2467 
2468 		rsp->vls[vfi].port_vl_xmit_pkts =
2469 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2470 						   idx_from_vl(vl)));
2471 
2472 		rsp->vls[vfi].port_vl_xmit_wait =
2473 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2474 						   idx_from_vl(vl)));
2475 
2476 		rsp->vls[vfi].port_vl_rcv_fecn =
2477 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2478 						  idx_from_vl(vl)));
2479 
2480 		rsp->vls[vfi].port_vl_rcv_becn =
2481 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2482 						  idx_from_vl(vl)));
2483 
2484 		rsp->vls[vfi].port_vl_xmit_discards =
2485 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2486 						   idx_from_vl(vl)));
2487 		vlinfo++;
2488 		vfi++;
2489 	}
2490 
2491 	a0_portstatus(ppd, rsp, vl_select_mask);
2492 
2493 	if (resp_len)
2494 		*resp_len += response_data_size;
2495 
2496 	return reply((struct ib_mad_hdr *)pmp);
2497 }
2498 
2499 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2500 				     u8 res_lli, u8 res_ler)
2501 {
2502 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2503 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2504 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2505 	u64 error_counter_summary = 0, tmp;
2506 
2507 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2508 						CNTR_INVALID_VL);
2509 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2510 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2511 						CNTR_INVALID_VL);
2512 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2513 						CNTR_INVALID_VL);
2514 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2515 					       CNTR_INVALID_VL);
2516 	/* local link integrity must be right-shifted by the lli resolution */
2517 	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2518 						CNTR_INVALID_VL) >> res_lli);
2519 	/* link error recovery must b right-shifted by the ler resolution */
2520 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2521 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2522 	error_counter_summary += (tmp >> res_ler);
2523 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2524 					       CNTR_INVALID_VL);
2525 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2526 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2527 					       CNTR_INVALID_VL);
2528 	/* ppd->link_downed is a 32-bit value */
2529 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2530 						CNTR_INVALID_VL);
2531 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2532 	/* this is an 8-bit quantity */
2533 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2534 
2535 	return error_counter_summary;
2536 }
2537 
2538 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2539 			    u32 vl_select_mask)
2540 {
2541 	if (!is_bx(ppd->dd)) {
2542 		unsigned long vl;
2543 		u64 sum_vl_xmit_wait = 0;
2544 		u32 vl_all_mask = VL_MASK_ALL;
2545 
2546 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2547 				 8 * sizeof(vl_all_mask)) {
2548 			u64 tmp = sum_vl_xmit_wait +
2549 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2550 						 idx_from_vl(vl));
2551 			if (tmp < sum_vl_xmit_wait) {
2552 				/* we wrapped */
2553 				sum_vl_xmit_wait = (u64)~0;
2554 				break;
2555 			}
2556 			sum_vl_xmit_wait = tmp;
2557 		}
2558 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2559 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2560 	}
2561 }
2562 
2563 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2564 				   struct _port_dctrs *rsp)
2565 {
2566 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2567 
2568 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2569 						CNTR_INVALID_VL));
2570 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2571 						CNTR_INVALID_VL));
2572 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2573 						CNTR_INVALID_VL));
2574 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2575 						CNTR_INVALID_VL));
2576 	rsp->port_multicast_xmit_pkts =
2577 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2578 					  CNTR_INVALID_VL));
2579 	rsp->port_multicast_rcv_pkts =
2580 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2581 					  CNTR_INVALID_VL));
2582 }
2583 
2584 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2585 				    struct ib_device *ibdev,
2586 				    u8 port, u32 *resp_len)
2587 {
2588 	struct opa_port_data_counters_msg *req =
2589 		(struct opa_port_data_counters_msg *)pmp->data;
2590 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2591 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2592 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2593 	struct _port_dctrs *rsp;
2594 	struct _vls_dctrs *vlinfo;
2595 	size_t response_data_size;
2596 	u32 num_ports;
2597 	u8 num_pslm;
2598 	u8 lq, num_vls;
2599 	u8 res_lli, res_ler;
2600 	u64 port_mask;
2601 	u8 port_num;
2602 	unsigned long vl;
2603 	u32 vl_select_mask;
2604 	int vfi;
2605 
2606 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2607 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2608 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2609 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2610 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2611 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2612 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2613 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2614 
2615 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2616 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2617 		return reply((struct ib_mad_hdr *)pmp);
2618 	}
2619 
2620 	/* Sanity check */
2621 	response_data_size = sizeof(struct opa_port_data_counters_msg) +
2622 				num_vls * sizeof(struct _vls_dctrs);
2623 
2624 	if (response_data_size > sizeof(pmp->data)) {
2625 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2626 		return reply((struct ib_mad_hdr *)pmp);
2627 	}
2628 
2629 	/*
2630 	 * The bit set in the mask needs to be consistent with the
2631 	 * port the request came in on.
2632 	 */
2633 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2634 	port_num = find_first_bit((unsigned long *)&port_mask,
2635 				  sizeof(port_mask) * 8);
2636 
2637 	if (port_num != port) {
2638 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2639 		return reply((struct ib_mad_hdr *)pmp);
2640 	}
2641 
2642 	rsp = &req->port[0];
2643 	memset(rsp, 0, sizeof(*rsp));
2644 
2645 	rsp->port_number = port;
2646 	/*
2647 	 * Note that link_quality_indicator is a 32 bit quantity in
2648 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2649 	 * where it's a byte).
2650 	 */
2651 	hfi1_read_link_quality(dd, &lq);
2652 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2653 	pma_get_opa_port_dctrs(ibdev, rsp);
2654 
2655 	rsp->port_xmit_wait =
2656 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2657 	rsp->port_rcv_fecn =
2658 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2659 	rsp->port_rcv_becn =
2660 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2661 	rsp->port_error_counter_summary =
2662 		cpu_to_be64(get_error_counter_summary(ibdev, port,
2663 						      res_lli, res_ler));
2664 
2665 	vlinfo = &rsp->vls[0];
2666 	vfi = 0;
2667 	/* The vl_select_mask has been checked above, and we know
2668 	 * that it contains only entries which represent valid VLs.
2669 	 * So in the for_each_set_bit() loop below, we don't need
2670 	 * any additional checks for vl.
2671 	 */
2672 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2673 			 8 * sizeof(req->vl_select_mask)) {
2674 		memset(vlinfo, 0, sizeof(*vlinfo));
2675 
2676 		rsp->vls[vfi].port_vl_xmit_data =
2677 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2678 						   idx_from_vl(vl)));
2679 
2680 		rsp->vls[vfi].port_vl_rcv_data =
2681 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2682 						  idx_from_vl(vl)));
2683 
2684 		rsp->vls[vfi].port_vl_xmit_pkts =
2685 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2686 						   idx_from_vl(vl)));
2687 
2688 		rsp->vls[vfi].port_vl_rcv_pkts =
2689 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2690 						  idx_from_vl(vl)));
2691 
2692 		rsp->vls[vfi].port_vl_xmit_wait =
2693 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2694 						   idx_from_vl(vl)));
2695 
2696 		rsp->vls[vfi].port_vl_rcv_fecn =
2697 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2698 						  idx_from_vl(vl)));
2699 		rsp->vls[vfi].port_vl_rcv_becn =
2700 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2701 						  idx_from_vl(vl)));
2702 
2703 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2704 		/* rsp->port_vl_xmit_wasted_bw ??? */
2705 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2706 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2707 		 */
2708 		/*rsp->vls[vfi].port_vl_mark_fecn =
2709 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2710 		 *		+ offset));
2711 		 */
2712 		vlinfo++;
2713 		vfi++;
2714 	}
2715 
2716 	a0_datacounters(ppd, rsp, vl_select_mask);
2717 
2718 	if (resp_len)
2719 		*resp_len += response_data_size;
2720 
2721 	return reply((struct ib_mad_hdr *)pmp);
2722 }
2723 
2724 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2725 				       struct ib_device *ibdev, u8 port)
2726 {
2727 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2728 						pmp->data;
2729 	struct _port_dctrs rsp;
2730 
2731 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2732 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2733 		goto bail;
2734 	}
2735 
2736 	memset(&rsp, 0, sizeof(rsp));
2737 	pma_get_opa_port_dctrs(ibdev, &rsp);
2738 
2739 	p->port_xmit_data = rsp.port_xmit_data;
2740 	p->port_rcv_data = rsp.port_rcv_data;
2741 	p->port_xmit_packets = rsp.port_xmit_pkts;
2742 	p->port_rcv_packets = rsp.port_rcv_pkts;
2743 	p->port_unicast_xmit_packets = 0;
2744 	p->port_unicast_rcv_packets =  0;
2745 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2746 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2747 
2748 bail:
2749 	return reply((struct ib_mad_hdr *)pmp);
2750 }
2751 
2752 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2753 				   struct _port_ectrs *rsp, u8 port)
2754 {
2755 	u64 tmp, tmp2;
2756 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2757 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2758 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2759 
2760 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2761 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2762 					CNTR_INVALID_VL);
2763 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2764 		/* overflow/wrapped */
2765 		rsp->link_error_recovery = cpu_to_be32(~0);
2766 	} else {
2767 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2768 	}
2769 
2770 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2771 						CNTR_INVALID_VL));
2772 	rsp->port_rcv_errors =
2773 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2774 	rsp->port_rcv_remote_physical_errors =
2775 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2776 					  CNTR_INVALID_VL));
2777 	rsp->port_rcv_switch_relay_errors = 0;
2778 	rsp->port_xmit_discards =
2779 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2780 					   CNTR_INVALID_VL));
2781 	rsp->port_xmit_constraint_errors =
2782 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2783 					   CNTR_INVALID_VL));
2784 	rsp->port_rcv_constraint_errors =
2785 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2786 					   CNTR_INVALID_VL));
2787 	rsp->local_link_integrity_errors =
2788 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2789 					  CNTR_INVALID_VL));
2790 	rsp->excessive_buffer_overruns =
2791 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2792 }
2793 
2794 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2795 				  struct ib_device *ibdev,
2796 				  u8 port, u32 *resp_len)
2797 {
2798 	size_t response_data_size;
2799 	struct _port_ectrs *rsp;
2800 	u8 port_num;
2801 	struct opa_port_error_counters64_msg *req;
2802 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2803 	u32 num_ports;
2804 	u8 num_pslm;
2805 	u8 num_vls;
2806 	struct hfi1_ibport *ibp;
2807 	struct hfi1_pportdata *ppd;
2808 	struct _vls_ectrs *vlinfo;
2809 	unsigned long vl;
2810 	u64 port_mask, tmp;
2811 	u32 vl_select_mask;
2812 	int vfi;
2813 
2814 	req = (struct opa_port_error_counters64_msg *)pmp->data;
2815 
2816 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2817 
2818 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2819 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2820 
2821 	if (num_ports != 1 || num_ports != num_pslm) {
2822 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2823 		return reply((struct ib_mad_hdr *)pmp);
2824 	}
2825 
2826 	response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2827 				num_vls * sizeof(struct _vls_ectrs);
2828 
2829 	if (response_data_size > sizeof(pmp->data)) {
2830 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2831 		return reply((struct ib_mad_hdr *)pmp);
2832 	}
2833 	/*
2834 	 * The bit set in the mask needs to be consistent with the
2835 	 * port the request came in on.
2836 	 */
2837 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2838 	port_num = find_first_bit((unsigned long *)&port_mask,
2839 				  sizeof(port_mask) * 8);
2840 
2841 	if (port_num != port) {
2842 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2843 		return reply((struct ib_mad_hdr *)pmp);
2844 	}
2845 
2846 	rsp = &req->port[0];
2847 
2848 	ibp = to_iport(ibdev, port_num);
2849 	ppd = ppd_from_ibp(ibp);
2850 
2851 	memset(rsp, 0, sizeof(*rsp));
2852 	rsp->port_number = port_num;
2853 
2854 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2855 
2856 	rsp->port_rcv_remote_physical_errors =
2857 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2858 					  CNTR_INVALID_VL));
2859 	rsp->fm_config_errors =
2860 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2861 					  CNTR_INVALID_VL));
2862 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2863 
2864 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2865 	rsp->port_rcv_errors =
2866 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2867 	vlinfo = &rsp->vls[0];
2868 	vfi = 0;
2869 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2870 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2871 			 8 * sizeof(req->vl_select_mask)) {
2872 		memset(vlinfo, 0, sizeof(*vlinfo));
2873 		rsp->vls[vfi].port_vl_xmit_discards =
2874 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2875 						   idx_from_vl(vl)));
2876 		vlinfo += 1;
2877 		vfi++;
2878 	}
2879 
2880 	if (resp_len)
2881 		*resp_len += response_data_size;
2882 
2883 	return reply((struct ib_mad_hdr *)pmp);
2884 }
2885 
2886 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2887 				   struct ib_device *ibdev, u8 port)
2888 {
2889 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2890 		pmp->data;
2891 	struct _port_ectrs rsp;
2892 	u64 temp_link_overrun_errors;
2893 	u64 temp_64;
2894 	u32 temp_32;
2895 
2896 	memset(&rsp, 0, sizeof(rsp));
2897 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
2898 
2899 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2900 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2901 		goto bail;
2902 	}
2903 
2904 	p->symbol_error_counter = 0; /* N/A for OPA */
2905 
2906 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
2907 	if (temp_32 > 0xFFUL)
2908 		p->link_error_recovery_counter = 0xFF;
2909 	else
2910 		p->link_error_recovery_counter = (u8)temp_32;
2911 
2912 	temp_32 = be32_to_cpu(rsp.link_downed);
2913 	if (temp_32 > 0xFFUL)
2914 		p->link_downed_counter = 0xFF;
2915 	else
2916 		p->link_downed_counter = (u8)temp_32;
2917 
2918 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2919 	if (temp_64 > 0xFFFFUL)
2920 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
2921 	else
2922 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2923 
2924 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2925 	if (temp_64 > 0xFFFFUL)
2926 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2927 	else
2928 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2929 
2930 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2931 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2932 
2933 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2934 	if (temp_64 > 0xFFFFUL)
2935 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
2936 	else
2937 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2938 
2939 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2940 	if (temp_64 > 0xFFUL)
2941 		p->port_xmit_constraint_errors = 0xFF;
2942 	else
2943 		p->port_xmit_constraint_errors = (u8)temp_64;
2944 
2945 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2946 	if (temp_64 > 0xFFUL)
2947 		p->port_rcv_constraint_errors = 0xFFUL;
2948 	else
2949 		p->port_rcv_constraint_errors = (u8)temp_64;
2950 
2951 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
2952 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2953 	if (temp_64 > 0xFUL)
2954 		temp_64 = 0xFUL;
2955 
2956 	temp_link_overrun_errors = temp_64 << 4;
2957 
2958 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2959 	if (temp_64 > 0xFUL)
2960 		temp_64 = 0xFUL;
2961 	temp_link_overrun_errors |= temp_64;
2962 
2963 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
2964 
2965 	p->vl15_dropped = 0; /* N/A for OPA */
2966 
2967 bail:
2968 	return reply((struct ib_mad_hdr *)pmp);
2969 }
2970 
2971 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2972 				 struct ib_device *ibdev,
2973 				 u8 port, u32 *resp_len)
2974 {
2975 	size_t response_data_size;
2976 	struct _port_ei *rsp;
2977 	struct opa_port_error_info_msg *req;
2978 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2979 	u64 port_mask;
2980 	u32 num_ports;
2981 	u8 port_num;
2982 	u8 num_pslm;
2983 	u64 reg;
2984 
2985 	req = (struct opa_port_error_info_msg *)pmp->data;
2986 	rsp = &req->port[0];
2987 
2988 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
2989 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2990 
2991 	memset(rsp, 0, sizeof(*rsp));
2992 
2993 	if (num_ports != 1 || num_ports != num_pslm) {
2994 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2995 		return reply((struct ib_mad_hdr *)pmp);
2996 	}
2997 
2998 	/* Sanity check */
2999 	response_data_size = sizeof(struct opa_port_error_info_msg);
3000 
3001 	if (response_data_size > sizeof(pmp->data)) {
3002 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3003 		return reply((struct ib_mad_hdr *)pmp);
3004 	}
3005 
3006 	/*
3007 	 * The bit set in the mask needs to be consistent with the port
3008 	 * the request came in on.
3009 	 */
3010 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3011 	port_num = find_first_bit((unsigned long *)&port_mask,
3012 				  sizeof(port_mask) * 8);
3013 
3014 	if (port_num != port) {
3015 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3016 		return reply((struct ib_mad_hdr *)pmp);
3017 	}
3018 
3019 	/* PortRcvErrorInfo */
3020 	rsp->port_rcv_ei.status_and_code =
3021 		dd->err_info_rcvport.status_and_code;
3022 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3023 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3024 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3025 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3026 
3027 	/* ExcessiverBufferOverrunInfo */
3028 	reg = read_csr(dd, RCV_ERR_INFO);
3029 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3030 		/*
3031 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3032 		 * first pkt that encountered an excess buffer overrun
3033 		 */
3034 		u8 tmp = (u8)reg;
3035 
3036 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3037 		tmp <<= 2;
3038 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3039 		/* set the status bit */
3040 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3041 	}
3042 
3043 	rsp->port_xmit_constraint_ei.status =
3044 		dd->err_info_xmit_constraint.status;
3045 	rsp->port_xmit_constraint_ei.pkey =
3046 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3047 	rsp->port_xmit_constraint_ei.slid =
3048 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3049 
3050 	rsp->port_rcv_constraint_ei.status =
3051 		dd->err_info_rcv_constraint.status;
3052 	rsp->port_rcv_constraint_ei.pkey =
3053 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3054 	rsp->port_rcv_constraint_ei.slid =
3055 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3056 
3057 	/* UncorrectableErrorInfo */
3058 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3059 
3060 	/* FMConfigErrorInfo */
3061 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3062 
3063 	if (resp_len)
3064 		*resp_len += response_data_size;
3065 
3066 	return reply((struct ib_mad_hdr *)pmp);
3067 }
3068 
3069 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3070 				  struct ib_device *ibdev,
3071 				  u8 port, u32 *resp_len)
3072 {
3073 	struct opa_clear_port_status *req =
3074 		(struct opa_clear_port_status *)pmp->data;
3075 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3076 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3077 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3078 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3079 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3080 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3081 	u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3082 	unsigned long vl;
3083 
3084 	if ((nports != 1) || (portn != 1 << port)) {
3085 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3086 		return reply((struct ib_mad_hdr *)pmp);
3087 	}
3088 	/*
3089 	 * only counters returned by pma_get_opa_portstatus() are
3090 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3091 	 * the corresponding change should be made here as well.
3092 	 */
3093 
3094 	if (counter_select & CS_PORT_XMIT_DATA)
3095 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3096 
3097 	if (counter_select & CS_PORT_RCV_DATA)
3098 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3099 
3100 	if (counter_select & CS_PORT_XMIT_PKTS)
3101 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3102 
3103 	if (counter_select & CS_PORT_RCV_PKTS)
3104 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3105 
3106 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3107 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3108 
3109 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3110 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3111 
3112 	if (counter_select & CS_PORT_XMIT_WAIT)
3113 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3114 
3115 	/* ignore cs_sw_portCongestion for HFIs */
3116 
3117 	if (counter_select & CS_PORT_RCV_FECN)
3118 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3119 
3120 	if (counter_select & CS_PORT_RCV_BECN)
3121 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3122 
3123 	/* ignore cs_port_xmit_time_cong for HFIs */
3124 	/* ignore cs_port_xmit_wasted_bw for now */
3125 	/* ignore cs_port_xmit_wait_data for now */
3126 	if (counter_select & CS_PORT_RCV_BUBBLE)
3127 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3128 
3129 	/* Only applicable for switch */
3130 	/* if (counter_select & CS_PORT_MARK_FECN)
3131 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3132 	 */
3133 
3134 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3135 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3136 
3137 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3138 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3139 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3140 
3141 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3142 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3143 
3144 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3145 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3146 
3147 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3148 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3149 
3150 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3151 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3152 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3153 			       CNTR_INVALID_VL, 0);
3154 	}
3155 
3156 	if (counter_select & CS_PORT_RCV_ERRORS)
3157 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3158 
3159 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3160 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3161 		dd->rcv_ovfl_cnt = 0;
3162 	}
3163 
3164 	if (counter_select & CS_FM_CONFIG_ERRORS)
3165 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3166 
3167 	if (counter_select & CS_LINK_DOWNED)
3168 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3169 
3170 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3171 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3172 
3173 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3174 			 8 * sizeof(vl_select_mask)) {
3175 		if (counter_select & CS_PORT_XMIT_DATA)
3176 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3177 
3178 		if (counter_select & CS_PORT_RCV_DATA)
3179 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3180 
3181 		if (counter_select & CS_PORT_XMIT_PKTS)
3182 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3183 
3184 		if (counter_select & CS_PORT_RCV_PKTS)
3185 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3186 
3187 		if (counter_select & CS_PORT_XMIT_WAIT)
3188 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3189 
3190 		/* sw_port_vl_congestion is 0 for HFIs */
3191 		if (counter_select & CS_PORT_RCV_FECN)
3192 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3193 
3194 		if (counter_select & CS_PORT_RCV_BECN)
3195 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3196 
3197 		/* port_vl_xmit_time_cong is 0 for HFIs */
3198 		/* port_vl_xmit_wasted_bw ??? */
3199 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3200 		if (counter_select & CS_PORT_RCV_BUBBLE)
3201 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3202 
3203 		/* if (counter_select & CS_PORT_MARK_FECN)
3204 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3205 		 */
3206 		if (counter_select & C_SW_XMIT_DSCD_VL)
3207 			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3208 					idx_from_vl(vl), 0);
3209 	}
3210 
3211 	if (resp_len)
3212 		*resp_len += sizeof(*req);
3213 
3214 	return reply((struct ib_mad_hdr *)pmp);
3215 }
3216 
3217 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3218 				 struct ib_device *ibdev,
3219 				 u8 port, u32 *resp_len)
3220 {
3221 	struct _port_ei *rsp;
3222 	struct opa_port_error_info_msg *req;
3223 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3224 	u64 port_mask;
3225 	u32 num_ports;
3226 	u8 port_num;
3227 	u8 num_pslm;
3228 	u32 error_info_select;
3229 
3230 	req = (struct opa_port_error_info_msg *)pmp->data;
3231 	rsp = &req->port[0];
3232 
3233 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3234 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3235 
3236 	memset(rsp, 0, sizeof(*rsp));
3237 
3238 	if (num_ports != 1 || num_ports != num_pslm) {
3239 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3240 		return reply((struct ib_mad_hdr *)pmp);
3241 	}
3242 
3243 	/*
3244 	 * The bit set in the mask needs to be consistent with the port
3245 	 * the request came in on.
3246 	 */
3247 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3248 	port_num = find_first_bit((unsigned long *)&port_mask,
3249 				  sizeof(port_mask) * 8);
3250 
3251 	if (port_num != port) {
3252 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3253 		return reply((struct ib_mad_hdr *)pmp);
3254 	}
3255 
3256 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3257 
3258 	/* PortRcvErrorInfo */
3259 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3260 		/* turn off status bit */
3261 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3262 
3263 	/* ExcessiverBufferOverrunInfo */
3264 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3265 		/*
3266 		 * status bit is essentially kept in the h/w - bit 5 of
3267 		 * RCV_ERR_INFO
3268 		 */
3269 		write_csr(dd, RCV_ERR_INFO,
3270 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3271 
3272 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3273 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3274 
3275 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3276 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3277 
3278 	/* UncorrectableErrorInfo */
3279 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3280 		/* turn off status bit */
3281 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3282 
3283 	/* FMConfigErrorInfo */
3284 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3285 		/* turn off status bit */
3286 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3287 
3288 	if (resp_len)
3289 		*resp_len += sizeof(*req);
3290 
3291 	return reply((struct ib_mad_hdr *)pmp);
3292 }
3293 
3294 struct opa_congestion_info_attr {
3295 	__be16 congestion_info;
3296 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3297 	u8 congestion_log_length;
3298 } __packed;
3299 
3300 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3301 				    struct ib_device *ibdev, u8 port,
3302 				    u32 *resp_len)
3303 {
3304 	struct opa_congestion_info_attr *p =
3305 		(struct opa_congestion_info_attr *)data;
3306 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3307 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3308 
3309 	p->congestion_info = 0;
3310 	p->control_table_cap = ppd->cc_max_table_entries;
3311 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3312 
3313 	if (resp_len)
3314 		*resp_len += sizeof(*p);
3315 
3316 	return reply((struct ib_mad_hdr *)smp);
3317 }
3318 
3319 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3320 				       u8 *data, struct ib_device *ibdev,
3321 				       u8 port, u32 *resp_len)
3322 {
3323 	int i;
3324 	struct opa_congestion_setting_attr *p =
3325 		(struct opa_congestion_setting_attr *)data;
3326 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3327 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3328 	struct opa_congestion_setting_entry_shadow *entries;
3329 	struct cc_state *cc_state;
3330 
3331 	rcu_read_lock();
3332 
3333 	cc_state = get_cc_state(ppd);
3334 
3335 	if (!cc_state) {
3336 		rcu_read_unlock();
3337 		return reply((struct ib_mad_hdr *)smp);
3338 	}
3339 
3340 	entries = cc_state->cong_setting.entries;
3341 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3342 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3343 	for (i = 0; i < OPA_MAX_SLS; i++) {
3344 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3345 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3346 		p->entries[i].trigger_threshold =
3347 			entries[i].trigger_threshold;
3348 		p->entries[i].ccti_min = entries[i].ccti_min;
3349 	}
3350 
3351 	rcu_read_unlock();
3352 
3353 	if (resp_len)
3354 		*resp_len += sizeof(*p);
3355 
3356 	return reply((struct ib_mad_hdr *)smp);
3357 }
3358 
3359 /*
3360  * Apply congestion control information stored in the ppd to the
3361  * active structure.
3362  */
3363 static void apply_cc_state(struct hfi1_pportdata *ppd)
3364 {
3365 	struct cc_state *old_cc_state, *new_cc_state;
3366 
3367 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3368 	if (!new_cc_state)
3369 		return;
3370 
3371 	/*
3372 	 * Hold the lock for updating *and* to prevent ppd information
3373 	 * from changing during the update.
3374 	 */
3375 	spin_lock(&ppd->cc_state_lock);
3376 
3377 	old_cc_state = get_cc_state_protected(ppd);
3378 	if (!old_cc_state) {
3379 		/* never active, or shutting down */
3380 		spin_unlock(&ppd->cc_state_lock);
3381 		kfree(new_cc_state);
3382 		return;
3383 	}
3384 
3385 	*new_cc_state = *old_cc_state;
3386 
3387 	new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3388 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3389 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3390 
3391 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3392 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3393 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3394 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3395 
3396 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3397 
3398 	spin_unlock(&ppd->cc_state_lock);
3399 
3400 	kfree_rcu(old_cc_state, rcu);
3401 }
3402 
3403 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3404 				       struct ib_device *ibdev, u8 port,
3405 				       u32 *resp_len)
3406 {
3407 	struct opa_congestion_setting_attr *p =
3408 		(struct opa_congestion_setting_attr *)data;
3409 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3410 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3411 	struct opa_congestion_setting_entry_shadow *entries;
3412 	int i;
3413 
3414 	/*
3415 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3416 	 * our information is consistent with anyone trying to apply the state.
3417 	 */
3418 	spin_lock(&ppd->cc_state_lock);
3419 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3420 
3421 	entries = ppd->congestion_entries;
3422 	for (i = 0; i < OPA_MAX_SLS; i++) {
3423 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3424 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3425 		entries[i].trigger_threshold =
3426 			p->entries[i].trigger_threshold;
3427 		entries[i].ccti_min = p->entries[i].ccti_min;
3428 	}
3429 	spin_unlock(&ppd->cc_state_lock);
3430 
3431 	/* now apply the information */
3432 	apply_cc_state(ppd);
3433 
3434 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3435 					   resp_len);
3436 }
3437 
3438 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3439 					u8 *data, struct ib_device *ibdev,
3440 					u8 port, u32 *resp_len)
3441 {
3442 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3443 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3444 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3445 	s64 ts;
3446 	int i;
3447 
3448 	if (am != 0) {
3449 		smp->status |= IB_SMP_INVALID_FIELD;
3450 		return reply((struct ib_mad_hdr *)smp);
3451 	}
3452 
3453 	spin_lock_irq(&ppd->cc_log_lock);
3454 
3455 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3456 	cong_log->congestion_flags = 0;
3457 	cong_log->threshold_event_counter =
3458 		cpu_to_be16(ppd->threshold_event_counter);
3459 	memcpy(cong_log->threshold_cong_event_map,
3460 	       ppd->threshold_cong_event_map,
3461 	       sizeof(cong_log->threshold_cong_event_map));
3462 	/* keep timestamp in units of 1.024 usec */
3463 	ts = ktime_to_ns(ktime_get()) / 1024;
3464 	cong_log->current_time_stamp = cpu_to_be32(ts);
3465 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3466 		struct opa_hfi1_cong_log_event_internal *cce =
3467 			&ppd->cc_events[ppd->cc_mad_idx++];
3468 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3469 			ppd->cc_mad_idx = 0;
3470 		/*
3471 		 * Entries which are older than twice the time
3472 		 * required to wrap the counter are supposed to
3473 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3474 		 */
3475 		if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3476 			continue;
3477 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3478 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3479 		       &cce->rqpn, 3);
3480 		cong_log->events[i].sl_svc_type_cn_entry =
3481 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3482 		cong_log->events[i].remote_lid_cn_entry =
3483 			cpu_to_be32(cce->rlid);
3484 		cong_log->events[i].timestamp_cn_entry =
3485 			cpu_to_be32(cce->timestamp);
3486 	}
3487 
3488 	/*
3489 	 * Reset threshold_cong_event_map, and threshold_event_counter
3490 	 * to 0 when log is read.
3491 	 */
3492 	memset(ppd->threshold_cong_event_map, 0x0,
3493 	       sizeof(ppd->threshold_cong_event_map));
3494 	ppd->threshold_event_counter = 0;
3495 
3496 	spin_unlock_irq(&ppd->cc_log_lock);
3497 
3498 	if (resp_len)
3499 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3500 
3501 	return reply((struct ib_mad_hdr *)smp);
3502 }
3503 
3504 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3505 				   struct ib_device *ibdev, u8 port,
3506 				   u32 *resp_len)
3507 {
3508 	struct ib_cc_table_attr *cc_table_attr =
3509 		(struct ib_cc_table_attr *)data;
3510 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3511 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3512 	u32 start_block = OPA_AM_START_BLK(am);
3513 	u32 n_blocks = OPA_AM_NBLK(am);
3514 	struct ib_cc_table_entry_shadow *entries;
3515 	int i, j;
3516 	u32 sentry, eentry;
3517 	struct cc_state *cc_state;
3518 
3519 	/* sanity check n_blocks, start_block */
3520 	if (n_blocks == 0 ||
3521 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3522 		smp->status |= IB_SMP_INVALID_FIELD;
3523 		return reply((struct ib_mad_hdr *)smp);
3524 	}
3525 
3526 	rcu_read_lock();
3527 
3528 	cc_state = get_cc_state(ppd);
3529 
3530 	if (!cc_state) {
3531 		rcu_read_unlock();
3532 		return reply((struct ib_mad_hdr *)smp);
3533 	}
3534 
3535 	sentry = start_block * IB_CCT_ENTRIES;
3536 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3537 
3538 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3539 
3540 	entries = cc_state->cct.entries;
3541 
3542 	/* return n_blocks, though the last block may not be full */
3543 	for (j = 0, i = sentry; i < eentry; j++, i++)
3544 		cc_table_attr->ccti_entries[j].entry =
3545 			cpu_to_be16(entries[i].entry);
3546 
3547 	rcu_read_unlock();
3548 
3549 	if (resp_len)
3550 		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3551 
3552 	return reply((struct ib_mad_hdr *)smp);
3553 }
3554 
3555 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3556 				   struct ib_device *ibdev, u8 port,
3557 				   u32 *resp_len)
3558 {
3559 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3560 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3561 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3562 	u32 start_block = OPA_AM_START_BLK(am);
3563 	u32 n_blocks = OPA_AM_NBLK(am);
3564 	struct ib_cc_table_entry_shadow *entries;
3565 	int i, j;
3566 	u32 sentry, eentry;
3567 	u16 ccti_limit;
3568 
3569 	/* sanity check n_blocks, start_block */
3570 	if (n_blocks == 0 ||
3571 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3572 		smp->status |= IB_SMP_INVALID_FIELD;
3573 		return reply((struct ib_mad_hdr *)smp);
3574 	}
3575 
3576 	sentry = start_block * IB_CCT_ENTRIES;
3577 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3578 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3579 
3580 	/* sanity check ccti_limit */
3581 	ccti_limit = be16_to_cpu(p->ccti_limit);
3582 	if (ccti_limit + 1 > eentry) {
3583 		smp->status |= IB_SMP_INVALID_FIELD;
3584 		return reply((struct ib_mad_hdr *)smp);
3585 	}
3586 
3587 	/*
3588 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3589 	 * our information is consistent with anyone trying to apply the state.
3590 	 */
3591 	spin_lock(&ppd->cc_state_lock);
3592 	ppd->total_cct_entry = ccti_limit + 1;
3593 	entries = ppd->ccti_entries;
3594 	for (j = 0, i = sentry; i < eentry; j++, i++)
3595 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3596 	spin_unlock(&ppd->cc_state_lock);
3597 
3598 	/* now apply the information */
3599 	apply_cc_state(ppd);
3600 
3601 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3602 }
3603 
3604 struct opa_led_info {
3605 	__be32 rsvd_led_mask;
3606 	__be32 rsvd;
3607 };
3608 
3609 #define OPA_LED_SHIFT	31
3610 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3611 
3612 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3613 				   struct ib_device *ibdev, u8 port,
3614 				   u32 *resp_len)
3615 {
3616 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3617 	struct hfi1_pportdata *ppd = dd->pport;
3618 	struct opa_led_info *p = (struct opa_led_info *)data;
3619 	u32 nport = OPA_AM_NPORT(am);
3620 	u32 is_beaconing_active;
3621 
3622 	if (nport != 1) {
3623 		smp->status |= IB_SMP_INVALID_FIELD;
3624 		return reply((struct ib_mad_hdr *)smp);
3625 	}
3626 
3627 	/*
3628 	 * This pairs with the memory barrier in hfi1_start_led_override to
3629 	 * ensure that we read the correct state of LED beaconing represented
3630 	 * by led_override_timer_active
3631 	 */
3632 	smp_rmb();
3633 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3634 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3635 
3636 	if (resp_len)
3637 		*resp_len += sizeof(struct opa_led_info);
3638 
3639 	return reply((struct ib_mad_hdr *)smp);
3640 }
3641 
3642 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3643 				   struct ib_device *ibdev, u8 port,
3644 				   u32 *resp_len)
3645 {
3646 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3647 	struct opa_led_info *p = (struct opa_led_info *)data;
3648 	u32 nport = OPA_AM_NPORT(am);
3649 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3650 
3651 	if (nport != 1) {
3652 		smp->status |= IB_SMP_INVALID_FIELD;
3653 		return reply((struct ib_mad_hdr *)smp);
3654 	}
3655 
3656 	if (on)
3657 		hfi1_start_led_override(dd->pport, 2000, 1500);
3658 	else
3659 		shutdown_led_override(dd->pport);
3660 
3661 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3662 }
3663 
3664 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3665 			    u8 *data, struct ib_device *ibdev, u8 port,
3666 			    u32 *resp_len)
3667 {
3668 	int ret;
3669 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3670 
3671 	switch (attr_id) {
3672 	case IB_SMP_ATTR_NODE_DESC:
3673 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3674 					      resp_len);
3675 		break;
3676 	case IB_SMP_ATTR_NODE_INFO:
3677 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3678 					      resp_len);
3679 		break;
3680 	case IB_SMP_ATTR_PORT_INFO:
3681 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3682 					      resp_len);
3683 		break;
3684 	case IB_SMP_ATTR_PKEY_TABLE:
3685 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3686 					       resp_len);
3687 		break;
3688 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3689 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3690 					      resp_len);
3691 		break;
3692 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3693 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3694 					      resp_len);
3695 		break;
3696 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3697 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3698 					       resp_len);
3699 		break;
3700 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3701 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3702 						resp_len);
3703 		break;
3704 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3705 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3706 					 resp_len);
3707 		break;
3708 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3709 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3710 					 resp_len);
3711 		break;
3712 	case OPA_ATTRIB_ID_CABLE_INFO:
3713 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3714 						resp_len);
3715 		break;
3716 	case IB_SMP_ATTR_VL_ARB_TABLE:
3717 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3718 					    resp_len);
3719 		break;
3720 	case OPA_ATTRIB_ID_CONGESTION_INFO:
3721 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3722 					       resp_len);
3723 		break;
3724 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3725 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3726 						  port, resp_len);
3727 		break;
3728 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3729 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3730 						   port, resp_len);
3731 		break;
3732 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3733 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3734 					      resp_len);
3735 		break;
3736 	case IB_SMP_ATTR_LED_INFO:
3737 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3738 					      resp_len);
3739 		break;
3740 	case IB_SMP_ATTR_SM_INFO:
3741 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3742 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3743 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3744 			return IB_MAD_RESULT_SUCCESS;
3745 		/* FALLTHROUGH */
3746 	default:
3747 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3748 		ret = reply((struct ib_mad_hdr *)smp);
3749 		break;
3750 	}
3751 	return ret;
3752 }
3753 
3754 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3755 			    u8 *data, struct ib_device *ibdev, u8 port,
3756 			    u32 *resp_len)
3757 {
3758 	int ret;
3759 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3760 
3761 	switch (attr_id) {
3762 	case IB_SMP_ATTR_PORT_INFO:
3763 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3764 					      resp_len);
3765 		break;
3766 	case IB_SMP_ATTR_PKEY_TABLE:
3767 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3768 					       resp_len);
3769 		break;
3770 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3771 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3772 					      resp_len);
3773 		break;
3774 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3775 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3776 					      resp_len);
3777 		break;
3778 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3779 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3780 					       resp_len);
3781 		break;
3782 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3783 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3784 						resp_len);
3785 		break;
3786 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3787 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3788 					 resp_len);
3789 		break;
3790 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3791 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3792 					 resp_len);
3793 		break;
3794 	case IB_SMP_ATTR_VL_ARB_TABLE:
3795 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3796 					    resp_len);
3797 		break;
3798 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3799 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3800 						  port, resp_len);
3801 		break;
3802 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3803 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3804 					      resp_len);
3805 		break;
3806 	case IB_SMP_ATTR_LED_INFO:
3807 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3808 					      resp_len);
3809 		break;
3810 	case IB_SMP_ATTR_SM_INFO:
3811 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3812 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3813 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3814 			return IB_MAD_RESULT_SUCCESS;
3815 		/* FALLTHROUGH */
3816 	default:
3817 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3818 		ret = reply((struct ib_mad_hdr *)smp);
3819 		break;
3820 	}
3821 	return ret;
3822 }
3823 
3824 static inline void set_aggr_error(struct opa_aggregate *ag)
3825 {
3826 	ag->err_reqlength |= cpu_to_be16(0x8000);
3827 }
3828 
3829 static int subn_get_opa_aggregate(struct opa_smp *smp,
3830 				  struct ib_device *ibdev, u8 port,
3831 				  u32 *resp_len)
3832 {
3833 	int i;
3834 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3835 	u8 *next_smp = opa_get_smp_data(smp);
3836 
3837 	if (num_attr < 1 || num_attr > 117) {
3838 		smp->status |= IB_SMP_INVALID_FIELD;
3839 		return reply((struct ib_mad_hdr *)smp);
3840 	}
3841 
3842 	for (i = 0; i < num_attr; i++) {
3843 		struct opa_aggregate *agg;
3844 		size_t agg_data_len;
3845 		size_t agg_size;
3846 		u32 am;
3847 
3848 		agg = (struct opa_aggregate *)next_smp;
3849 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3850 		agg_size = sizeof(*agg) + agg_data_len;
3851 		am = be32_to_cpu(agg->attr_mod);
3852 
3853 		*resp_len += agg_size;
3854 
3855 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3856 			smp->status |= IB_SMP_INVALID_FIELD;
3857 			return reply((struct ib_mad_hdr *)smp);
3858 		}
3859 
3860 		/* zero the payload for this segment */
3861 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
3862 
3863 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3864 					ibdev, port, NULL);
3865 		if (smp->status & ~IB_SMP_DIRECTION) {
3866 			set_aggr_error(agg);
3867 			return reply((struct ib_mad_hdr *)smp);
3868 		}
3869 		next_smp += agg_size;
3870 	}
3871 
3872 	return reply((struct ib_mad_hdr *)smp);
3873 }
3874 
3875 static int subn_set_opa_aggregate(struct opa_smp *smp,
3876 				  struct ib_device *ibdev, u8 port,
3877 				  u32 *resp_len)
3878 {
3879 	int i;
3880 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3881 	u8 *next_smp = opa_get_smp_data(smp);
3882 
3883 	if (num_attr < 1 || num_attr > 117) {
3884 		smp->status |= IB_SMP_INVALID_FIELD;
3885 		return reply((struct ib_mad_hdr *)smp);
3886 	}
3887 
3888 	for (i = 0; i < num_attr; i++) {
3889 		struct opa_aggregate *agg;
3890 		size_t agg_data_len;
3891 		size_t agg_size;
3892 		u32 am;
3893 
3894 		agg = (struct opa_aggregate *)next_smp;
3895 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3896 		agg_size = sizeof(*agg) + agg_data_len;
3897 		am = be32_to_cpu(agg->attr_mod);
3898 
3899 		*resp_len += agg_size;
3900 
3901 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3902 			smp->status |= IB_SMP_INVALID_FIELD;
3903 			return reply((struct ib_mad_hdr *)smp);
3904 		}
3905 
3906 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3907 					ibdev, port, NULL);
3908 		if (smp->status & ~IB_SMP_DIRECTION) {
3909 			set_aggr_error(agg);
3910 			return reply((struct ib_mad_hdr *)smp);
3911 		}
3912 		next_smp += agg_size;
3913 	}
3914 
3915 	return reply((struct ib_mad_hdr *)smp);
3916 }
3917 
3918 /*
3919  * OPAv1 specifies that, on the transition to link up, these counters
3920  * are cleared:
3921  *   PortRcvErrors [*]
3922  *   LinkErrorRecovery
3923  *   LocalLinkIntegrityErrors
3924  *   ExcessiveBufferOverruns [*]
3925  *
3926  * [*] Error info associated with these counters is retained, but the
3927  * error info status is reset to 0.
3928  */
3929 void clear_linkup_counters(struct hfi1_devdata *dd)
3930 {
3931 	/* PortRcvErrors */
3932 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3933 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3934 	/* LinkErrorRecovery */
3935 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3936 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3937 	/* LocalLinkIntegrityErrors */
3938 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3939 	/* ExcessiveBufferOverruns */
3940 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3941 	dd->rcv_ovfl_cnt = 0;
3942 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3943 }
3944 
3945 /*
3946  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3947  * local node, 0 otherwise.
3948  */
3949 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3950 			const struct ib_wc *in_wc)
3951 {
3952 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3953 	const struct opa_smp *smp = (const struct opa_smp *)mad;
3954 
3955 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3956 		return (smp->hop_cnt == 0 &&
3957 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3958 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3959 	}
3960 
3961 	return (in_wc->slid == ppd->lid);
3962 }
3963 
3964 /*
3965  * opa_local_smp_check() should only be called on MADs for which
3966  * is_local_mad() returns true. It applies the SMP checks that are
3967  * specific to SMPs which are sent from, and destined to this node.
3968  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3969  * otherwise.
3970  *
3971  * SMPs which arrive from other nodes are instead checked by
3972  * opa_smp_check().
3973  */
3974 static int opa_local_smp_check(struct hfi1_ibport *ibp,
3975 			       const struct ib_wc *in_wc)
3976 {
3977 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3978 	u16 slid = in_wc->slid;
3979 	u16 pkey;
3980 
3981 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
3982 		return 1;
3983 
3984 	pkey = ppd->pkeys[in_wc->pkey_index];
3985 	/*
3986 	 * We need to do the "node-local" checks specified in OPAv1,
3987 	 * rev 0.90, section 9.10.26, which are:
3988 	 *   - pkey is 0x7fff, or 0xffff
3989 	 *   - Source QPN == 0 || Destination QPN == 0
3990 	 *   - the MAD header's management class is either
3991 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
3992 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
3993 	 *   - SLID != 0
3994 	 *
3995 	 * However, we know (and so don't need to check again) that,
3996 	 * for local SMPs, the MAD stack passes MADs with:
3997 	 *   - Source QPN of 0
3998 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
3999 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4000 	 *     our own port's lid
4001 	 *
4002 	 */
4003 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4004 		return 0;
4005 	ingress_pkey_table_fail(ppd, pkey, slid);
4006 	return 1;
4007 }
4008 
4009 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4010 			    u8 port, const struct opa_mad *in_mad,
4011 			    struct opa_mad *out_mad,
4012 			    u32 *resp_len)
4013 {
4014 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4015 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4016 	u8 *data;
4017 	u32 am;
4018 	__be16 attr_id;
4019 	int ret;
4020 
4021 	*out_mad = *in_mad;
4022 	data = opa_get_smp_data(smp);
4023 
4024 	am = be32_to_cpu(smp->attr_mod);
4025 	attr_id = smp->attr_id;
4026 	if (smp->class_version != OPA_SM_CLASS_VERSION) {
4027 		smp->status |= IB_SMP_UNSUP_VERSION;
4028 		ret = reply((struct ib_mad_hdr *)smp);
4029 		return ret;
4030 	}
4031 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4032 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4033 			 smp->hop_cnt);
4034 	if (ret) {
4035 		u32 port_num = be32_to_cpu(smp->attr_mod);
4036 
4037 		/*
4038 		 * If this is a get/set portinfo, we already check the
4039 		 * M_Key if the MAD is for another port and the M_Key
4040 		 * is OK on the receiving port. This check is needed
4041 		 * to increment the error counters when the M_Key
4042 		 * fails to match on *both* ports.
4043 		 */
4044 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4045 		    (smp->method == IB_MGMT_METHOD_GET ||
4046 		     smp->method == IB_MGMT_METHOD_SET) &&
4047 		    port_num && port_num <= ibdev->phys_port_cnt &&
4048 		    port != port_num)
4049 			(void)check_mkey(to_iport(ibdev, port_num),
4050 					  (struct ib_mad_hdr *)smp, 0,
4051 					  smp->mkey, smp->route.dr.dr_slid,
4052 					  smp->route.dr.return_path,
4053 					  smp->hop_cnt);
4054 		ret = IB_MAD_RESULT_FAILURE;
4055 		return ret;
4056 	}
4057 
4058 	*resp_len = opa_get_smp_header_size(smp);
4059 
4060 	switch (smp->method) {
4061 	case IB_MGMT_METHOD_GET:
4062 		switch (attr_id) {
4063 		default:
4064 			clear_opa_smp_data(smp);
4065 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4066 					       ibdev, port, resp_len);
4067 			break;
4068 		case OPA_ATTRIB_ID_AGGREGATE:
4069 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4070 						     resp_len);
4071 			break;
4072 		}
4073 		break;
4074 	case IB_MGMT_METHOD_SET:
4075 		switch (attr_id) {
4076 		default:
4077 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4078 					       ibdev, port, resp_len);
4079 			break;
4080 		case OPA_ATTRIB_ID_AGGREGATE:
4081 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4082 						     resp_len);
4083 			break;
4084 		}
4085 		break;
4086 	case IB_MGMT_METHOD_TRAP:
4087 	case IB_MGMT_METHOD_REPORT:
4088 	case IB_MGMT_METHOD_REPORT_RESP:
4089 	case IB_MGMT_METHOD_GET_RESP:
4090 		/*
4091 		 * The ib_mad module will call us to process responses
4092 		 * before checking for other consumers.
4093 		 * Just tell the caller to process it normally.
4094 		 */
4095 		ret = IB_MAD_RESULT_SUCCESS;
4096 		break;
4097 	default:
4098 		smp->status |= IB_SMP_UNSUP_METHOD;
4099 		ret = reply((struct ib_mad_hdr *)smp);
4100 		break;
4101 	}
4102 
4103 	return ret;
4104 }
4105 
4106 static int process_subn(struct ib_device *ibdev, int mad_flags,
4107 			u8 port, const struct ib_mad *in_mad,
4108 			struct ib_mad *out_mad)
4109 {
4110 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4111 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4112 	int ret;
4113 
4114 	*out_mad = *in_mad;
4115 	if (smp->class_version != 1) {
4116 		smp->status |= IB_SMP_UNSUP_VERSION;
4117 		ret = reply((struct ib_mad_hdr *)smp);
4118 		return ret;
4119 	}
4120 
4121 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4122 			 smp->mkey, (__force __be32)smp->dr_slid,
4123 			 smp->return_path, smp->hop_cnt);
4124 	if (ret) {
4125 		u32 port_num = be32_to_cpu(smp->attr_mod);
4126 
4127 		/*
4128 		 * If this is a get/set portinfo, we already check the
4129 		 * M_Key if the MAD is for another port and the M_Key
4130 		 * is OK on the receiving port. This check is needed
4131 		 * to increment the error counters when the M_Key
4132 		 * fails to match on *both* ports.
4133 		 */
4134 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4135 		    (smp->method == IB_MGMT_METHOD_GET ||
4136 		     smp->method == IB_MGMT_METHOD_SET) &&
4137 		    port_num && port_num <= ibdev->phys_port_cnt &&
4138 		    port != port_num)
4139 			(void)check_mkey(to_iport(ibdev, port_num),
4140 					 (struct ib_mad_hdr *)smp, 0,
4141 					 smp->mkey,
4142 					 (__force __be32)smp->dr_slid,
4143 					 smp->return_path, smp->hop_cnt);
4144 		ret = IB_MAD_RESULT_FAILURE;
4145 		return ret;
4146 	}
4147 
4148 	switch (smp->method) {
4149 	case IB_MGMT_METHOD_GET:
4150 		switch (smp->attr_id) {
4151 		case IB_SMP_ATTR_NODE_INFO:
4152 			ret = subn_get_nodeinfo(smp, ibdev, port);
4153 			break;
4154 		default:
4155 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4156 			ret = reply((struct ib_mad_hdr *)smp);
4157 			break;
4158 		}
4159 		break;
4160 	}
4161 
4162 	return ret;
4163 }
4164 
4165 static int process_perf(struct ib_device *ibdev, u8 port,
4166 			const struct ib_mad *in_mad,
4167 			struct ib_mad *out_mad)
4168 {
4169 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4170 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4171 						&pmp->data;
4172 	int ret = IB_MAD_RESULT_FAILURE;
4173 
4174 	*out_mad = *in_mad;
4175 	if (pmp->mad_hdr.class_version != 1) {
4176 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4177 		ret = reply((struct ib_mad_hdr *)pmp);
4178 		return ret;
4179 	}
4180 
4181 	switch (pmp->mad_hdr.method) {
4182 	case IB_MGMT_METHOD_GET:
4183 		switch (pmp->mad_hdr.attr_id) {
4184 		case IB_PMA_PORT_COUNTERS:
4185 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4186 			break;
4187 		case IB_PMA_PORT_COUNTERS_EXT:
4188 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4189 			break;
4190 		case IB_PMA_CLASS_PORT_INFO:
4191 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4192 			ret = reply((struct ib_mad_hdr *)pmp);
4193 			break;
4194 		default:
4195 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4196 			ret = reply((struct ib_mad_hdr *)pmp);
4197 			break;
4198 		}
4199 		break;
4200 
4201 	case IB_MGMT_METHOD_SET:
4202 		if (pmp->mad_hdr.attr_id) {
4203 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4204 			ret = reply((struct ib_mad_hdr *)pmp);
4205 		}
4206 		break;
4207 
4208 	case IB_MGMT_METHOD_TRAP:
4209 	case IB_MGMT_METHOD_GET_RESP:
4210 		/*
4211 		 * The ib_mad module will call us to process responses
4212 		 * before checking for other consumers.
4213 		 * Just tell the caller to process it normally.
4214 		 */
4215 		ret = IB_MAD_RESULT_SUCCESS;
4216 		break;
4217 
4218 	default:
4219 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4220 		ret = reply((struct ib_mad_hdr *)pmp);
4221 		break;
4222 	}
4223 
4224 	return ret;
4225 }
4226 
4227 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4228 			    const struct opa_mad *in_mad,
4229 			    struct opa_mad *out_mad, u32 *resp_len)
4230 {
4231 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4232 	int ret;
4233 
4234 	*out_mad = *in_mad;
4235 
4236 	if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4237 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4238 		return reply((struct ib_mad_hdr *)pmp);
4239 	}
4240 
4241 	*resp_len = sizeof(pmp->mad_hdr);
4242 
4243 	switch (pmp->mad_hdr.method) {
4244 	case IB_MGMT_METHOD_GET:
4245 		switch (pmp->mad_hdr.attr_id) {
4246 		case IB_PMA_CLASS_PORT_INFO:
4247 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4248 			break;
4249 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4250 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4251 						     resp_len);
4252 			break;
4253 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4254 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4255 						       resp_len);
4256 			break;
4257 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4258 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4259 						     resp_len);
4260 			break;
4261 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4262 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4263 						    resp_len);
4264 			break;
4265 		default:
4266 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4267 			ret = reply((struct ib_mad_hdr *)pmp);
4268 			break;
4269 		}
4270 		break;
4271 
4272 	case IB_MGMT_METHOD_SET:
4273 		switch (pmp->mad_hdr.attr_id) {
4274 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4275 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4276 						     resp_len);
4277 			break;
4278 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4279 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4280 						    resp_len);
4281 			break;
4282 		default:
4283 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4284 			ret = reply((struct ib_mad_hdr *)pmp);
4285 			break;
4286 		}
4287 		break;
4288 
4289 	case IB_MGMT_METHOD_TRAP:
4290 	case IB_MGMT_METHOD_GET_RESP:
4291 		/*
4292 		 * The ib_mad module will call us to process responses
4293 		 * before checking for other consumers.
4294 		 * Just tell the caller to process it normally.
4295 		 */
4296 		ret = IB_MAD_RESULT_SUCCESS;
4297 		break;
4298 
4299 	default:
4300 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4301 		ret = reply((struct ib_mad_hdr *)pmp);
4302 		break;
4303 	}
4304 
4305 	return ret;
4306 }
4307 
4308 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4309 				u8 port, const struct ib_wc *in_wc,
4310 				const struct ib_grh *in_grh,
4311 				const struct opa_mad *in_mad,
4312 				struct opa_mad *out_mad, size_t *out_mad_size,
4313 				u16 *out_mad_pkey_index)
4314 {
4315 	int ret;
4316 	int pkey_idx;
4317 	u32 resp_len = 0;
4318 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4319 
4320 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4321 	if (pkey_idx < 0) {
4322 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4323 			hfi1_get_pkey(ibp, 1));
4324 		pkey_idx = 1;
4325 	}
4326 	*out_mad_pkey_index = (u16)pkey_idx;
4327 
4328 	switch (in_mad->mad_hdr.mgmt_class) {
4329 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4330 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4331 		if (is_local_mad(ibp, in_mad, in_wc)) {
4332 			ret = opa_local_smp_check(ibp, in_wc);
4333 			if (ret)
4334 				return IB_MAD_RESULT_FAILURE;
4335 		}
4336 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4337 				       out_mad, &resp_len);
4338 		goto bail;
4339 	case IB_MGMT_CLASS_PERF_MGMT:
4340 		ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4341 				       &resp_len);
4342 		goto bail;
4343 
4344 	default:
4345 		ret = IB_MAD_RESULT_SUCCESS;
4346 	}
4347 
4348 bail:
4349 	if (ret & IB_MAD_RESULT_REPLY)
4350 		*out_mad_size = round_up(resp_len, 8);
4351 	else if (ret & IB_MAD_RESULT_SUCCESS)
4352 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4353 
4354 	return ret;
4355 }
4356 
4357 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4358 			       const struct ib_wc *in_wc,
4359 			       const struct ib_grh *in_grh,
4360 			       const struct ib_mad *in_mad,
4361 			       struct ib_mad *out_mad)
4362 {
4363 	int ret;
4364 
4365 	switch (in_mad->mad_hdr.mgmt_class) {
4366 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4367 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4368 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4369 		break;
4370 	case IB_MGMT_CLASS_PERF_MGMT:
4371 		ret = process_perf(ibdev, port, in_mad, out_mad);
4372 		break;
4373 	default:
4374 		ret = IB_MAD_RESULT_SUCCESS;
4375 		break;
4376 	}
4377 
4378 	return ret;
4379 }
4380 
4381 /**
4382  * hfi1_process_mad - process an incoming MAD packet
4383  * @ibdev: the infiniband device this packet came in on
4384  * @mad_flags: MAD flags
4385  * @port: the port number this packet came in on
4386  * @in_wc: the work completion entry for this packet
4387  * @in_grh: the global route header for this packet
4388  * @in_mad: the incoming MAD
4389  * @out_mad: any outgoing MAD reply
4390  *
4391  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4392  * interested in processing.
4393  *
4394  * Note that the verbs framework has already done the MAD sanity checks,
4395  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4396  * MADs.
4397  *
4398  * This is called by the ib_mad module.
4399  */
4400 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4401 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4402 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4403 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4404 		     u16 *out_mad_pkey_index)
4405 {
4406 	switch (in_mad->base_version) {
4407 	case OPA_MGMT_BASE_VERSION:
4408 		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4409 			dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
4410 			return IB_MAD_RESULT_FAILURE;
4411 		}
4412 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4413 					    in_wc, in_grh,
4414 					    (struct opa_mad *)in_mad,
4415 					    (struct opa_mad *)out_mad,
4416 					    out_mad_size,
4417 					    out_mad_pkey_index);
4418 	case IB_MGMT_BASE_VERSION:
4419 		return hfi1_process_ib_mad(ibdev, mad_flags, port,
4420 					  in_wc, in_grh,
4421 					  (const struct ib_mad *)in_mad,
4422 					  (struct ib_mad *)out_mad);
4423 	default:
4424 		break;
4425 	}
4426 
4427 	return IB_MAD_RESULT_FAILURE;
4428 }
4429