xref: /openbmc/linux/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1 /*
2  * Copyright(c) 2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 /*
49  * This file contains OPA Virtual Network Interface Controller (VNIC)
50  * Ethernet Management Agent (EMA) driver
51  */
52 
53 #include <linux/module.h>
54 #include <rdma/ib_addr.h>
55 #include <rdma/ib_verbs.h>
56 #include <rdma/opa_smi.h>
57 #include <rdma/opa_port_info.h>
58 
59 #include "opa_vnic_internal.h"
60 
61 #define DRV_VERSION "1.0"
62 char opa_vnic_driver_name[] = "opa_vnic";
63 const char opa_vnic_driver_version[] = DRV_VERSION;
64 
65 /*
66  * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
67  * field in the class port info MAD.
68  */
69 #define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x)  (((x) >> 3) & 0x1f)
70 
71 /* Cap trap bursts to a reasonable limit good for normal cases */
72 #define OPA_VNIC_TRAP_BURST_LIMIT 4
73 
74 /*
75  * VNIC trap limit timeout.
76  * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
77  * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
78  * 13.4.9 Traps.
79  */
80 #define OPA_VNIC_TRAP_TIMEOUT  ((4096 * (1UL << 18)) / 1000)
81 
82 #define OPA_VNIC_UNSUP_ATTR  \
83 		cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
84 
85 #define OPA_VNIC_INVAL_ATTR  \
86 		cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
87 
88 #define OPA_VNIC_CLASS_CAP_TRAP   0x1
89 
90 /* Maximum number of VNIC ports supported */
91 #define OPA_VNIC_MAX_NUM_VPORT    255
92 
93 /**
94  * struct opa_vnic_vema_port -- VNIC VEMA port details
95  * @cport: pointer to port
96  * @mad_agent: pointer to mad agent for port
97  * @class_port_info: Class port info information.
98  * @tid: Transaction id
99  * @port_num: OPA port number
100  * @vport_idr: vnic ports idr
101  * @event_handler: ib event handler
102  * @lock: adapter interface lock
103  */
104 struct opa_vnic_vema_port {
105 	struct opa_vnic_ctrl_port      *cport;
106 	struct ib_mad_agent            *mad_agent;
107 	struct opa_class_port_info      class_port_info;
108 	u64                             tid;
109 	u8                              port_num;
110 	struct idr                      vport_idr;
111 	struct ib_event_handler         event_handler;
112 
113 	/* Lock to query/update network adapter */
114 	struct mutex                    lock;
115 };
116 
117 static void opa_vnic_vema_add_one(struct ib_device *device);
118 static void opa_vnic_vema_rem_one(struct ib_device *device,
119 				  void *client_data);
120 
121 static struct ib_client opa_vnic_client = {
122 	.name   = opa_vnic_driver_name,
123 	.add    = opa_vnic_vema_add_one,
124 	.remove = opa_vnic_vema_rem_one,
125 };
126 
127 /**
128  * vema_get_vport_num -- Get the vnic from the mad
129  * @recvd_mad:  Received mad
130  *
131  * Return: returns value of the vnic port number
132  */
133 static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
134 {
135 	return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
136 }
137 
138 /**
139  * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
140  * @recvd_mad: received mad
141  * @port: ptr to port struct on which MAD was recvd
142  *
143  * Return: vnic adapter
144  */
145 static inline struct opa_vnic_adapter *
146 vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
147 		       struct opa_vnic_vema_port *port)
148 {
149 	u8 vport_num = vema_get_vport_num(recvd_mad);
150 
151 	return idr_find(&port->vport_idr, vport_num);
152 }
153 
154 /**
155  * vema_mac_tbl_req_ok -- Check if mac request has correct values
156  * @mac_tbl: mac table
157  *
158  * This function checks for the validity of the offset and number of
159  * entries required.
160  *
161  * Return: true if offset and num_entries are valid
162  */
163 static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
164 {
165 	u16 offset, num_entries;
166 	u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
167 			   sizeof(mac_tbl->tbl_entries[0]));
168 
169 	offset = be16_to_cpu(mac_tbl->offset);
170 	num_entries = be16_to_cpu(mac_tbl->num_entries);
171 
172 	return ((num_entries <= req_entries) &&
173 		(offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
174 }
175 
176 /*
177  * Return the power on default values in the port info structure
178  * in big endian format as required by MAD.
179  */
180 static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
181 {
182 	memset(port_info, 0, sizeof(*port_info));
183 	port_info->vport.max_mac_tbl_ent =
184 		cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
185 	port_info->vport.max_smac_ent =
186 		cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
187 	port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
188 	port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
189 	port_info->vesw.eth_mtu = cpu_to_be16(ETH_DATA_LEN);
190 }
191 
192 /**
193  * vema_add_vport -- Add a new vnic port
194  * @port: ptr to opa_vnic_vema_port struct
195  * @vport_num: vnic port number (to be added)
196  *
197  * Return a pointer to the vnic adapter structure
198  */
199 static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
200 					       u8 vport_num)
201 {
202 	struct opa_vnic_ctrl_port *cport = port->cport;
203 	struct opa_vnic_adapter *adapter;
204 
205 	adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
206 	if (!IS_ERR(adapter)) {
207 		int rc;
208 
209 		adapter->cport = cport;
210 		rc = idr_alloc(&port->vport_idr, adapter, vport_num,
211 			       vport_num + 1, GFP_NOWAIT);
212 		if (rc < 0) {
213 			opa_vnic_rem_netdev(adapter);
214 			adapter = ERR_PTR(rc);
215 		}
216 	}
217 
218 	return adapter;
219 }
220 
221 /**
222  * vema_get_class_port_info -- Get class info for port
223  * @port:  Port on whic MAD was received
224  * @recvd_mad: pointer to the received mad
225  * @rsp_mad:   pointer to respose mad
226  *
227  * This function copies the latest class port info value set for the
228  * port and stores it for generating traps
229  */
230 static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
231 				     struct opa_vnic_vema_mad *recvd_mad,
232 				     struct opa_vnic_vema_mad *rsp_mad)
233 {
234 	struct opa_class_port_info *port_info;
235 
236 	port_info = (struct opa_class_port_info *)rsp_mad->data;
237 	memcpy(port_info, &port->class_port_info, sizeof(*port_info));
238 	port_info->base_version = OPA_MGMT_BASE_VERSION,
239 	port_info->class_version = OPA_EMA_CLASS_VERSION;
240 
241 	/*
242 	 * Set capability mask bit indicating agent generates traps,
243 	 * and set the maximum number of VNIC ports supported.
244 	 */
245 	port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
246 					   (OPA_VNIC_MAX_NUM_VPORT << 8)));
247 
248 	/*
249 	 * Since a get routine is always sent by the EM first we
250 	 * set the expected response time to
251 	 * 4.096 usec * 2^18 == 1.0737 sec here.
252 	 */
253 	port_info->cap_mask2_resp_time = cpu_to_be32(18);
254 }
255 
256 /**
257  * vema_set_class_port_info -- Get class info for port
258  * @port:  Port on whic MAD was received
259  * @recvd_mad: pointer to the received mad
260  * @rsp_mad:   pointer to respose mad
261  *
262  * This function updates the port class info for the specific vnic
263  * and sets up the response mad data
264  */
265 static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
266 				     struct opa_vnic_vema_mad *recvd_mad,
267 				     struct opa_vnic_vema_mad *rsp_mad)
268 {
269 	memcpy(&port->class_port_info, recvd_mad->data,
270 	       sizeof(port->class_port_info));
271 
272 	vema_get_class_port_info(port, recvd_mad, rsp_mad);
273 }
274 
275 /**
276  * vema_get_veswport_info -- Get veswport info
277  * @port:      source port on which MAD was received
278  * @recvd_mad: pointer to the received mad
279  * @rsp_mad:   pointer to respose mad
280  */
281 static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
282 				   struct opa_vnic_vema_mad *recvd_mad,
283 				   struct opa_vnic_vema_mad *rsp_mad)
284 {
285 	struct opa_veswport_info *port_info =
286 				  (struct opa_veswport_info *)rsp_mad->data;
287 	struct opa_vnic_adapter *adapter;
288 
289 	adapter = vema_get_vport_adapter(recvd_mad, port);
290 	if (adapter) {
291 		memset(port_info, 0, sizeof(*port_info));
292 		opa_vnic_get_vesw_info(adapter, &port_info->vesw);
293 		opa_vnic_get_per_veswport_info(adapter,
294 					       &port_info->vport);
295 	} else {
296 		vema_get_pod_values(port_info);
297 	}
298 }
299 
300 /**
301  * vema_set_veswport_info -- Set veswport info
302  * @port:      source port on which MAD was received
303  * @recvd_mad: pointer to the received mad
304  * @rsp_mad:   pointer to respose mad
305  *
306  * This function gets the port class infor for vnic
307  */
308 static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
309 				   struct opa_vnic_vema_mad *recvd_mad,
310 				   struct opa_vnic_vema_mad *rsp_mad)
311 {
312 	struct opa_vnic_ctrl_port *cport = port->cport;
313 	struct opa_veswport_info *port_info;
314 	struct opa_vnic_adapter *adapter;
315 	u8 vport_num;
316 
317 	vport_num = vema_get_vport_num(recvd_mad);
318 
319 	adapter = vema_get_vport_adapter(recvd_mad, port);
320 	if (!adapter) {
321 		adapter = vema_add_vport(port, vport_num);
322 		if (IS_ERR(adapter)) {
323 			c_err("failed to add vport %d: %ld\n",
324 			      vport_num, PTR_ERR(adapter));
325 			goto err_exit;
326 		}
327 	}
328 
329 	port_info = (struct opa_veswport_info *)recvd_mad->data;
330 	opa_vnic_set_vesw_info(adapter, &port_info->vesw);
331 	opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
332 
333 	/* Process the new config settings */
334 	opa_vnic_process_vema_config(adapter);
335 
336 	vema_get_veswport_info(port, recvd_mad, rsp_mad);
337 	return;
338 
339 err_exit:
340 	rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
341 }
342 
343 /**
344  * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
345  * @port:      source port on which MAD was received
346  * @recvd_mad: pointer to the received mad
347  * @rsp_mad:   pointer to respose mad
348  *
349  * This function gets the MAC entries that are programmed into
350  * the VNIC MAC forwarding table. It checks for the validity of
351  * the index into the MAC table and the number of entries that
352  * are to be retrieved.
353  */
354 static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
355 				 struct opa_vnic_vema_mad *recvd_mad,
356 				 struct opa_vnic_vema_mad *rsp_mad)
357 {
358 	struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
359 	struct opa_vnic_adapter *adapter;
360 
361 	adapter = vema_get_vport_adapter(recvd_mad, port);
362 	if (!adapter) {
363 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
364 		return;
365 	}
366 
367 	mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
368 	mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
369 
370 	if (vema_mac_tbl_req_ok(mac_tbl_in)) {
371 		mac_tbl_out->offset = mac_tbl_in->offset;
372 		mac_tbl_out->num_entries = mac_tbl_in->num_entries;
373 		opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
374 	} else {
375 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
376 	}
377 }
378 
379 /**
380  * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
381  * @port:      source port on which MAD was received
382  * @recvd_mad: pointer to the received mad
383  * @rsp_mad:   pointer to respose mad
384  *
385  * This function sets the MAC entries in the VNIC forwarding table
386  * It checks for the validity of the index and the number of forwarding
387  * table entries to be programmed.
388  */
389 static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
390 				 struct opa_vnic_vema_mad *recvd_mad,
391 				 struct opa_vnic_vema_mad *rsp_mad)
392 {
393 	struct opa_veswport_mactable *mac_tbl;
394 	struct opa_vnic_adapter *adapter;
395 
396 	adapter = vema_get_vport_adapter(recvd_mad, port);
397 	if (!adapter) {
398 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
399 		return;
400 	}
401 
402 	mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
403 	if (vema_mac_tbl_req_ok(mac_tbl)) {
404 		if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
405 			rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
406 	} else {
407 		rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
408 	}
409 	vema_get_mac_entries(port, recvd_mad, rsp_mad);
410 }
411 
412 /**
413  * vema_set_delete_vesw -- Reset VESW info to POD values
414  * @port:      source port on which MAD was received
415  * @recvd_mad: pointer to the received mad
416  * @rsp_mad:   pointer to respose mad
417  *
418  * This function clears all the fields of veswport info for the requested vesw
419  * and sets them back to the power-on default values. It does not delete the
420  * vesw.
421  */
422 static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
423 				 struct opa_vnic_vema_mad *recvd_mad,
424 				 struct opa_vnic_vema_mad *rsp_mad)
425 {
426 	struct opa_veswport_info *port_info =
427 				  (struct opa_veswport_info *)rsp_mad->data;
428 	struct opa_vnic_adapter *adapter;
429 
430 	adapter = vema_get_vport_adapter(recvd_mad, port);
431 	if (!adapter) {
432 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
433 		return;
434 	}
435 
436 	vema_get_pod_values(port_info);
437 	opa_vnic_set_vesw_info(adapter, &port_info->vesw);
438 	opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
439 
440 	/* Process the new config settings */
441 	opa_vnic_process_vema_config(adapter);
442 
443 	opa_vnic_release_mac_tbl(adapter);
444 
445 	vema_get_veswport_info(port, recvd_mad, rsp_mad);
446 }
447 
448 /**
449  * vema_get_mac_list -- Get the unicast/multicast macs.
450  * @port:      source port on which MAD was received
451  * @recvd_mad: Received mad contains fields to set vnic parameters
452  * @rsp_mad:   Response mad to be built
453  * @attr_id:   Attribute ID indicating multicast or unicast mac list
454  */
455 static void vema_get_mac_list(struct opa_vnic_vema_port *port,
456 			      struct opa_vnic_vema_mad *recvd_mad,
457 			      struct opa_vnic_vema_mad *rsp_mad,
458 			      u16 attr_id)
459 {
460 	struct opa_veswport_iface_macs *macs_in, *macs_out;
461 	int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
462 	struct opa_vnic_adapter *adapter;
463 
464 	adapter = vema_get_vport_adapter(recvd_mad, port);
465 	if (!adapter) {
466 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
467 		return;
468 	}
469 
470 	macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
471 	macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
472 
473 	macs_out->start_idx = macs_in->start_idx;
474 	if (macs_in->num_macs_in_msg)
475 		macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
476 	else
477 		macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
478 
479 	if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
480 		opa_vnic_query_mcast_macs(adapter, macs_out);
481 	else
482 		opa_vnic_query_ucast_macs(adapter, macs_out);
483 }
484 
485 /**
486  * vema_get_summary_counters -- Gets summary counters.
487  * @port:      source port on which MAD was received
488  * @recvd_mad: Received mad contains fields to set vnic parameters
489  * @rsp_mad:   Response mad to be built
490  */
491 static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
492 				      struct opa_vnic_vema_mad *recvd_mad,
493 				      struct opa_vnic_vema_mad *rsp_mad)
494 {
495 	struct opa_veswport_summary_counters *cntrs;
496 	struct opa_vnic_adapter *adapter;
497 
498 	adapter = vema_get_vport_adapter(recvd_mad, port);
499 	if (adapter) {
500 		cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
501 		opa_vnic_get_summary_counters(adapter, cntrs);
502 	} else {
503 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
504 	}
505 }
506 
507 /**
508  * vema_get_error_counters -- Gets summary counters.
509  * @port:      source port on which MAD was received
510  * @recvd_mad: Received mad contains fields to set vnic parameters
511  * @rsp_mad:   Response mad to be built
512  */
513 static void vema_get_error_counters(struct opa_vnic_vema_port *port,
514 				    struct opa_vnic_vema_mad *recvd_mad,
515 				    struct opa_vnic_vema_mad *rsp_mad)
516 {
517 	struct opa_veswport_error_counters *cntrs;
518 	struct opa_vnic_adapter *adapter;
519 
520 	adapter = vema_get_vport_adapter(recvd_mad, port);
521 	if (adapter) {
522 		cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
523 		opa_vnic_get_error_counters(adapter, cntrs);
524 	} else {
525 		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
526 	}
527 }
528 
529 /**
530  * vema_get -- Process received get MAD
531  * @port:      source port on which MAD was received
532  * @recvd_mad: Received mad
533  * @rsp_mad:   Response mad to be built
534  */
535 static void vema_get(struct opa_vnic_vema_port *port,
536 		     struct opa_vnic_vema_mad *recvd_mad,
537 		     struct opa_vnic_vema_mad *rsp_mad)
538 {
539 	u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
540 
541 	switch (attr_id) {
542 	case OPA_EM_ATTR_CLASS_PORT_INFO:
543 		vema_get_class_port_info(port, recvd_mad, rsp_mad);
544 		break;
545 	case OPA_EM_ATTR_VESWPORT_INFO:
546 		vema_get_veswport_info(port, recvd_mad, rsp_mad);
547 		break;
548 	case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
549 		vema_get_mac_entries(port, recvd_mad, rsp_mad);
550 		break;
551 	case OPA_EM_ATTR_IFACE_UCAST_MACS:
552 		/* fall through */
553 	case OPA_EM_ATTR_IFACE_MCAST_MACS:
554 		vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
555 		break;
556 	case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
557 		vema_get_summary_counters(port, recvd_mad, rsp_mad);
558 		break;
559 	case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
560 		vema_get_error_counters(port, recvd_mad, rsp_mad);
561 		break;
562 	default:
563 		rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
564 		break;
565 	}
566 }
567 
568 /**
569  * vema_set -- Process received set MAD
570  * @port:      source port on which MAD was received
571  * @recvd_mad: Received mad contains fields to set vnic parameters
572  * @rsp_mad:   Response mad to be built
573  */
574 static void vema_set(struct opa_vnic_vema_port *port,
575 		     struct opa_vnic_vema_mad *recvd_mad,
576 		     struct opa_vnic_vema_mad *rsp_mad)
577 {
578 	u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
579 
580 	switch (attr_id) {
581 	case OPA_EM_ATTR_CLASS_PORT_INFO:
582 		vema_set_class_port_info(port, recvd_mad, rsp_mad);
583 		break;
584 	case OPA_EM_ATTR_VESWPORT_INFO:
585 		vema_set_veswport_info(port, recvd_mad, rsp_mad);
586 		break;
587 	case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
588 		vema_set_mac_entries(port, recvd_mad, rsp_mad);
589 		break;
590 	case OPA_EM_ATTR_DELETE_VESW:
591 		vema_set_delete_vesw(port, recvd_mad, rsp_mad);
592 		break;
593 	default:
594 		rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
595 		break;
596 	}
597 }
598 
599 /**
600  * vema_send -- Send handler for VEMA MAD agent
601  * @mad_agent: pointer to the mad agent
602  * @mad_wc:    pointer to mad send work completion information
603  *
604  * Free all the data structures associated with the sent MAD
605  */
606 static void vema_send(struct ib_mad_agent *mad_agent,
607 		      struct ib_mad_send_wc *mad_wc)
608 {
609 	rdma_destroy_ah(mad_wc->send_buf->ah);
610 	ib_free_send_mad(mad_wc->send_buf);
611 }
612 
613 /**
614  * vema_recv -- Recv handler for VEMA MAD agent
615  * @mad_agent: pointer to the mad agent
616  * @send_buf: Send buffer if found, else NULL
617  * @mad_wc:    pointer to mad send work completion information
618  *
619  * Handle only set and get methods and respond to other methods
620  * as unsupported. Allocate response buffer and address handle
621  * for the response MAD.
622  */
623 static void vema_recv(struct ib_mad_agent *mad_agent,
624 		      struct ib_mad_send_buf *send_buf,
625 		      struct ib_mad_recv_wc *mad_wc)
626 {
627 	struct opa_vnic_vema_port *port;
628 	struct ib_ah              *ah;
629 	struct ib_mad_send_buf    *rsp;
630 	struct opa_vnic_vema_mad  *vema_mad;
631 
632 	if (!mad_wc || !mad_wc->recv_buf.mad)
633 		return;
634 
635 	port = mad_agent->context;
636 	ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
637 				  mad_wc->recv_buf.grh, mad_agent->port_num);
638 	if (IS_ERR(ah))
639 		goto free_recv_mad;
640 
641 	rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
642 				 mad_wc->wc->pkey_index, 0,
643 				 IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
644 				 GFP_KERNEL, OPA_MGMT_BASE_VERSION);
645 	if (IS_ERR(rsp))
646 		goto err_rsp;
647 
648 	rsp->ah = ah;
649 	vema_mad = rsp->mad;
650 	memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
651 	vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
652 	vema_mad->mad_hdr.status = 0;
653 
654 	/* Lock ensures network adapter is not removed */
655 	mutex_lock(&port->lock);
656 
657 	switch (mad_wc->recv_buf.mad->mad_hdr.method) {
658 	case IB_MGMT_METHOD_GET:
659 		vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
660 			 vema_mad);
661 		break;
662 	case IB_MGMT_METHOD_SET:
663 		vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
664 			 vema_mad);
665 		break;
666 	default:
667 		vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
668 		break;
669 	}
670 	mutex_unlock(&port->lock);
671 
672 	if (!ib_post_send_mad(rsp, NULL)) {
673 		/*
674 		 * with post send successful ah and send mad
675 		 * will be destroyed in send handler
676 		 */
677 		goto free_recv_mad;
678 	}
679 
680 	ib_free_send_mad(rsp);
681 
682 err_rsp:
683 	rdma_destroy_ah(ah);
684 free_recv_mad:
685 	ib_free_recv_mad(mad_wc);
686 }
687 
688 /**
689  * vema_get_port -- Gets the opa_vnic_vema_port
690  * @cport: pointer to control dev
691  * @port_num: Port number
692  *
693  * This function loops through the ports and returns
694  * the opa_vnic_vema port structure that is associated
695  * with the OPA port number
696  *
697  * Return: ptr to requested opa_vnic_vema_port strucure
698  *         if success, NULL if not
699  */
700 static struct opa_vnic_vema_port *
701 vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
702 {
703 	struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
704 
705 	if (port_num > cport->num_ports)
706 		return NULL;
707 
708 	return port + (port_num - 1);
709 }
710 
711 /**
712  * opa_vnic_vema_send_trap -- This function sends a trap to the EM
713  * @adapter: pointer to vnic adapter
714  * @data: pointer to trap data filled by calling function
715  * @lid:  issuers lid (encap_slid from vesw_port_info)
716  *
717  * This function is called from the VNIC driver to send a trap if there
718  * is somethng the EM should be notified about. These events currently
719  * are
720  * 1) UNICAST INTERFACE MACADDRESS changes
721  * 2) MULTICAST INTERFACE MACADDRESS changes
722  * 3) ETHERNET LINK STATUS changes
723  * While allocating the send mad the remote site qpn used is 1
724  * as this is the well known QP.
725  *
726  */
727 void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
728 			     struct __opa_veswport_trap *data, u32 lid)
729 {
730 	struct opa_vnic_ctrl_port *cport = adapter->cport;
731 	struct ib_mad_send_buf *send_buf;
732 	struct opa_vnic_vema_port *port;
733 	struct ib_device *ibp;
734 	struct opa_vnic_vema_mad_trap *trap_mad;
735 	struct opa_class_port_info *class;
736 	struct rdma_ah_attr ah_attr;
737 	struct ib_ah *ah;
738 	struct opa_veswport_trap *trap;
739 	u32 trap_lid;
740 	u16 pkey_idx;
741 
742 	if (!cport)
743 		goto err_exit;
744 	ibp = cport->ibdev;
745 	port = vema_get_port(cport, data->opaportnum);
746 	if (!port || !port->mad_agent)
747 		goto err_exit;
748 
749 	if (time_before(jiffies, adapter->trap_timeout)) {
750 		if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
751 			v_warn("Trap rate exceeded\n");
752 			goto err_exit;
753 		} else {
754 			adapter->trap_count++;
755 		}
756 	} else {
757 		adapter->trap_count = 0;
758 	}
759 
760 	class = &port->class_port_info;
761 	/* Set up address handle */
762 	memset(&ah_attr, 0, sizeof(ah_attr));
763 	ah_attr.type = rdma_ah_find_type(ibp, port->port_num);
764 	rdma_ah_set_sl(&ah_attr,
765 		       GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd));
766 	rdma_ah_set_port_num(&ah_attr, port->port_num);
767 	trap_lid = be32_to_cpu(class->trap_lid);
768 	/*
769 	 * check for trap lid validity, must not be zero
770 	 * The trap sink could change after we fashion the MAD but since traps
771 	 * are not guaranteed we won't use a lock as anyway the change will take
772 	 * place even with locking.
773 	 */
774 	if (!trap_lid) {
775 		c_err("%s: Invalid dlid\n", __func__);
776 		goto err_exit;
777 	}
778 
779 	rdma_ah_set_dlid(&ah_attr, trap_lid);
780 	ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
781 	if (IS_ERR(ah)) {
782 		c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
783 		c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
784 		      rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr),
785 		      rdma_ah_get_port_num(&ah_attr));
786 		goto err_exit;
787 	}
788 
789 	if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
790 			 &pkey_idx) < 0) {
791 		c_err("%s:full key not found, defaulting to partial\n",
792 		      __func__);
793 		if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
794 				 &pkey_idx) < 0)
795 			pkey_idx = 1;
796 	}
797 
798 	send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
799 				      IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
800 				      GFP_ATOMIC, OPA_MGMT_BASE_VERSION);
801 	if (IS_ERR(send_buf)) {
802 		c_err("%s:Couldn't allocate send buf\n", __func__);
803 		goto err_sndbuf;
804 	}
805 
806 	send_buf->ah = ah;
807 
808 	/* Set up common MAD hdr */
809 	trap_mad = send_buf->mad;
810 	trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
811 	trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
812 	trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
813 	trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
814 	port->tid++;
815 	trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
816 	trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
817 
818 	/* Set up vendor OUI */
819 	trap_mad->oui[0] = INTEL_OUI_1;
820 	trap_mad->oui[1] = INTEL_OUI_2;
821 	trap_mad->oui[2] = INTEL_OUI_3;
822 
823 	/* Setup notice attribute portion */
824 	trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
825 	trap_mad->notice.oui_1 = INTEL_OUI_1;
826 	trap_mad->notice.oui_2 = INTEL_OUI_2;
827 	trap_mad->notice.oui_3 = INTEL_OUI_3;
828 	trap_mad->notice.issuer_lid = cpu_to_be32(lid);
829 
830 	/* copy the actual trap data */
831 	trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
832 	trap->fabric_id = cpu_to_be16(data->fabric_id);
833 	trap->veswid = cpu_to_be16(data->veswid);
834 	trap->veswportnum = cpu_to_be32(data->veswportnum);
835 	trap->opaportnum = cpu_to_be16(data->opaportnum);
836 	trap->veswportindex = data->veswportindex;
837 	trap->opcode = data->opcode;
838 
839 	/* If successful send set up rate limit timeout else bail */
840 	if (ib_post_send_mad(send_buf, NULL)) {
841 		ib_free_send_mad(send_buf);
842 	} else {
843 		if (adapter->trap_count)
844 			return;
845 		adapter->trap_timeout = jiffies +
846 					usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
847 		return;
848 	}
849 
850 err_sndbuf:
851 	rdma_destroy_ah(ah);
852 err_exit:
853 	v_err("Aborting trap\n");
854 }
855 
856 static int vema_rem_vport(int id, void *p, void *data)
857 {
858 	struct opa_vnic_adapter *adapter = p;
859 
860 	opa_vnic_rem_netdev(adapter);
861 	return 0;
862 }
863 
864 static int vema_enable_vport(int id, void *p, void *data)
865 {
866 	struct opa_vnic_adapter *adapter = p;
867 
868 	netif_carrier_on(adapter->netdev);
869 	return 0;
870 }
871 
872 static int vema_disable_vport(int id, void *p, void *data)
873 {
874 	struct opa_vnic_adapter *adapter = p;
875 
876 	netif_carrier_off(adapter->netdev);
877 	return 0;
878 }
879 
880 static void opa_vnic_event(struct ib_event_handler *handler,
881 			   struct ib_event *record)
882 {
883 	struct opa_vnic_vema_port *port =
884 		container_of(handler, struct opa_vnic_vema_port, event_handler);
885 	struct opa_vnic_ctrl_port *cport = port->cport;
886 
887 	if (record->element.port_num != port->port_num)
888 		return;
889 
890 	c_dbg("OPA_VNIC received event %d on device %s port %d\n",
891 	      record->event, dev_name(&record->device->dev),
892 	      record->element.port_num);
893 
894 	if (record->event == IB_EVENT_PORT_ERR)
895 		idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
896 	if (record->event == IB_EVENT_PORT_ACTIVE)
897 		idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
898 }
899 
900 /**
901  * vema_unregister -- Unregisters agent
902  * @cport: pointer to control port
903  *
904  * This deletes the registration by VEMA for MADs
905  */
906 static void vema_unregister(struct opa_vnic_ctrl_port *cport)
907 {
908 	int i;
909 
910 	for (i = 1; i <= cport->num_ports; i++) {
911 		struct opa_vnic_vema_port *port = vema_get_port(cport, i);
912 
913 		if (!port->mad_agent)
914 			continue;
915 
916 		/* Lock ensures no MAD is being processed */
917 		mutex_lock(&port->lock);
918 		idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
919 		mutex_unlock(&port->lock);
920 
921 		ib_unregister_mad_agent(port->mad_agent);
922 		port->mad_agent = NULL;
923 		mutex_destroy(&port->lock);
924 		idr_destroy(&port->vport_idr);
925 		ib_unregister_event_handler(&port->event_handler);
926 	}
927 }
928 
929 /**
930  * vema_register -- Registers agent
931  * @cport: pointer to control port
932  *
933  * This function registers the handlers for the VEMA MADs
934  *
935  * Return: returns 0 on success. non zero otherwise
936  */
937 static int vema_register(struct opa_vnic_ctrl_port *cport)
938 {
939 	struct ib_mad_reg_req reg_req = {
940 		.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
941 		.mgmt_class_version = OPA_MGMT_BASE_VERSION,
942 		.oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
943 	};
944 	int i;
945 
946 	set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
947 	set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
948 
949 	/* register ib event handler and mad agent for each port on dev */
950 	for (i = 1; i <= cport->num_ports; i++) {
951 		struct opa_vnic_vema_port *port = vema_get_port(cport, i);
952 		int ret;
953 
954 		port->cport = cport;
955 		port->port_num = i;
956 
957 		INIT_IB_EVENT_HANDLER(&port->event_handler,
958 				      cport->ibdev, opa_vnic_event);
959 		ib_register_event_handler(&port->event_handler);
960 
961 		idr_init(&port->vport_idr);
962 		mutex_init(&port->lock);
963 		port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
964 							IB_QPT_GSI, &reg_req,
965 							IB_MGMT_RMPP_VERSION,
966 							vema_send, vema_recv,
967 							port, 0);
968 		if (IS_ERR(port->mad_agent)) {
969 			ret = PTR_ERR(port->mad_agent);
970 			port->mad_agent = NULL;
971 			mutex_destroy(&port->lock);
972 			idr_destroy(&port->vport_idr);
973 			vema_unregister(cport);
974 			return ret;
975 		}
976 	}
977 
978 	return 0;
979 }
980 
981 /**
982  * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM
983  * by way of ib_modify_port to indicate support for ethernet on the
984  * fabric.
985  * @cport: pointer to control port
986  * @en: enable or disable ethernet on fabric support
987  */
988 static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en)
989 {
990 	struct ib_port_modify pm = { 0 };
991 	int i;
992 
993 	if (en)
994 		pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
995 	else
996 		pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
997 
998 	for (i = 1; i <= cport->num_ports; i++)
999 		ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm);
1000 }
1001 
1002 /**
1003  * opa_vnic_vema_add_one -- Handle new ib device
1004  * @device: ib device pointer
1005  *
1006  * Allocate the vnic control port and initialize it.
1007  */
1008 static void opa_vnic_vema_add_one(struct ib_device *device)
1009 {
1010 	struct opa_vnic_ctrl_port *cport;
1011 	int rc, size = sizeof(*cport);
1012 
1013 	if (!rdma_cap_opa_vnic(device))
1014 		return;
1015 
1016 	size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
1017 	cport = kzalloc(size, GFP_KERNEL);
1018 	if (!cport)
1019 		return;
1020 
1021 	cport->num_ports = device->phys_port_cnt;
1022 	cport->ibdev = device;
1023 
1024 	/* Initialize opa vnic management agent (vema) */
1025 	rc = vema_register(cport);
1026 	if (!rc)
1027 		c_info("VNIC client initialized\n");
1028 
1029 	ib_set_client_data(device, &opa_vnic_client, cport);
1030 	opa_vnic_ctrl_config_dev(cport, true);
1031 }
1032 
1033 /**
1034  * opa_vnic_vema_rem_one -- Handle ib device removal
1035  * @device: ib device pointer
1036  * @client_data: ib client data
1037  *
1038  * Uninitialize and free the vnic control port.
1039  */
1040 static void opa_vnic_vema_rem_one(struct ib_device *device,
1041 				  void *client_data)
1042 {
1043 	struct opa_vnic_ctrl_port *cport = client_data;
1044 
1045 	if (!cport)
1046 		return;
1047 
1048 	c_info("removing VNIC client\n");
1049 	opa_vnic_ctrl_config_dev(cport, false);
1050 	vema_unregister(cport);
1051 	kfree(cport);
1052 }
1053 
1054 static int __init opa_vnic_init(void)
1055 {
1056 	int rc;
1057 
1058 	pr_info("OPA Virtual Network Driver - v%s\n",
1059 		opa_vnic_driver_version);
1060 
1061 	rc = ib_register_client(&opa_vnic_client);
1062 	if (rc)
1063 		pr_err("VNIC driver register failed %d\n", rc);
1064 
1065 	return rc;
1066 }
1067 module_init(opa_vnic_init);
1068 
1069 static void opa_vnic_deinit(void)
1070 {
1071 	ib_unregister_client(&opa_vnic_client);
1072 }
1073 module_exit(opa_vnic_deinit);
1074 
1075 MODULE_LICENSE("Dual BSD/GPL");
1076 MODULE_AUTHOR("Intel Corporation");
1077 MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
1078