xref: /openbmc/linux/drivers/infiniband/core/mad.c (revision f8523d0e83613ab8d082cd504dc53a09fbba4889)
1 /*
2  * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5  * Copyright (c) 2009 HNR Consulting. All rights reserved.
6  * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37 
38 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39 
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/module.h>
43 #include <linux/security.h>
44 #include <linux/xarray.h>
45 #include <rdma/ib_cache.h>
46 
47 #include "mad_priv.h"
48 #include "core_priv.h"
49 #include "mad_rmpp.h"
50 #include "smi.h"
51 #include "opa_smi.h"
52 #include "agent.h"
53 
54 #define CREATE_TRACE_POINTS
55 #include <trace/events/ib_mad.h>
56 
57 #ifdef CONFIG_TRACEPOINTS
58 static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
59 			  struct ib_mad_qp_info *qp_info,
60 			  struct trace_event_raw_ib_mad_send_template *entry)
61 {
62 	u16 pkey;
63 	struct ib_device *dev = qp_info->port_priv->device;
64 	u8 pnum = qp_info->port_priv->port_num;
65 	struct ib_ud_wr *wr = &mad_send_wr->send_wr;
66 	struct rdma_ah_attr attr = {};
67 
68 	rdma_query_ah(wr->ah, &attr);
69 
70 	/* These are common */
71 	entry->sl = attr.sl;
72 	ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
73 	entry->pkey = pkey;
74 	entry->rqpn = wr->remote_qpn;
75 	entry->rqkey = wr->remote_qkey;
76 	entry->dlid = rdma_ah_get_dlid(&attr);
77 }
78 #endif
79 
80 static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
81 static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
82 
83 module_param_named(send_queue_size, mad_sendq_size, int, 0444);
84 MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
85 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
86 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
87 
88 static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
89 static u32 ib_mad_client_next;
90 static struct list_head ib_mad_port_list;
91 
92 /* Port list lock */
93 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
94 
95 /* Forward declarations */
96 static int method_in_use(struct ib_mad_mgmt_method_table **method,
97 			 struct ib_mad_reg_req *mad_reg_req);
98 static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
99 static struct ib_mad_agent_private *find_mad_agent(
100 					struct ib_mad_port_private *port_priv,
101 					const struct ib_mad_hdr *mad);
102 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
103 				    struct ib_mad_private *mad);
104 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
105 static void timeout_sends(struct work_struct *work);
106 static void local_completions(struct work_struct *work);
107 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
108 			      struct ib_mad_agent_private *agent_priv,
109 			      u8 mgmt_class);
110 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
111 			   struct ib_mad_agent_private *agent_priv);
112 static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
113 			      struct ib_wc *wc);
114 static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
115 
116 /*
117  * Returns a ib_mad_port_private structure or NULL for a device/port
118  * Assumes ib_mad_port_list_lock is being held
119  */
120 static inline struct ib_mad_port_private *
121 __ib_get_mad_port(struct ib_device *device, int port_num)
122 {
123 	struct ib_mad_port_private *entry;
124 
125 	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
126 		if (entry->device == device && entry->port_num == port_num)
127 			return entry;
128 	}
129 	return NULL;
130 }
131 
132 /*
133  * Wrapper function to return a ib_mad_port_private structure or NULL
134  * for a device/port
135  */
136 static inline struct ib_mad_port_private *
137 ib_get_mad_port(struct ib_device *device, int port_num)
138 {
139 	struct ib_mad_port_private *entry;
140 	unsigned long flags;
141 
142 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
143 	entry = __ib_get_mad_port(device, port_num);
144 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
145 
146 	return entry;
147 }
148 
149 static inline u8 convert_mgmt_class(u8 mgmt_class)
150 {
151 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
152 	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
153 		0 : mgmt_class;
154 }
155 
156 static int get_spl_qp_index(enum ib_qp_type qp_type)
157 {
158 	switch (qp_type)
159 	{
160 	case IB_QPT_SMI:
161 		return 0;
162 	case IB_QPT_GSI:
163 		return 1;
164 	default:
165 		return -1;
166 	}
167 }
168 
169 static int vendor_class_index(u8 mgmt_class)
170 {
171 	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
172 }
173 
174 static int is_vendor_class(u8 mgmt_class)
175 {
176 	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
177 	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
178 		return 0;
179 	return 1;
180 }
181 
182 static int is_vendor_oui(char *oui)
183 {
184 	if (oui[0] || oui[1] || oui[2])
185 		return 1;
186 	return 0;
187 }
188 
189 static int is_vendor_method_in_use(
190 		struct ib_mad_mgmt_vendor_class *vendor_class,
191 		struct ib_mad_reg_req *mad_reg_req)
192 {
193 	struct ib_mad_mgmt_method_table *method;
194 	int i;
195 
196 	for (i = 0; i < MAX_MGMT_OUI; i++) {
197 		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
198 			method = vendor_class->method_table[i];
199 			if (method) {
200 				if (method_in_use(&method, mad_reg_req))
201 					return 1;
202 				else
203 					break;
204 			}
205 		}
206 	}
207 	return 0;
208 }
209 
210 int ib_response_mad(const struct ib_mad_hdr *hdr)
211 {
212 	return ((hdr->method & IB_MGMT_METHOD_RESP) ||
213 		(hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) ||
214 		((hdr->mgmt_class == IB_MGMT_CLASS_BM) &&
215 		 (hdr->attr_mod & IB_BM_ATTR_MOD_RESP)));
216 }
217 EXPORT_SYMBOL(ib_response_mad);
218 
219 /*
220  * ib_register_mad_agent - Register to send/receive MADs
221  *
222  * Context: Process context.
223  */
224 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
225 					   u8 port_num,
226 					   enum ib_qp_type qp_type,
227 					   struct ib_mad_reg_req *mad_reg_req,
228 					   u8 rmpp_version,
229 					   ib_mad_send_handler send_handler,
230 					   ib_mad_recv_handler recv_handler,
231 					   void *context,
232 					   u32 registration_flags)
233 {
234 	struct ib_mad_port_private *port_priv;
235 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
236 	struct ib_mad_agent_private *mad_agent_priv;
237 	struct ib_mad_reg_req *reg_req = NULL;
238 	struct ib_mad_mgmt_class_table *class;
239 	struct ib_mad_mgmt_vendor_class_table *vendor;
240 	struct ib_mad_mgmt_vendor_class *vendor_class;
241 	struct ib_mad_mgmt_method_table *method;
242 	int ret2, qpn;
243 	u8 mgmt_class, vclass;
244 
245 	if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
246 	    (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
247 		return ERR_PTR(-EPROTONOSUPPORT);
248 
249 	/* Validate parameters */
250 	qpn = get_spl_qp_index(qp_type);
251 	if (qpn == -1) {
252 		dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
253 				    __func__, qp_type);
254 		goto error1;
255 	}
256 
257 	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
258 		dev_dbg_ratelimited(&device->dev,
259 				    "%s: invalid RMPP Version %u\n",
260 				    __func__, rmpp_version);
261 		goto error1;
262 	}
263 
264 	/* Validate MAD registration request if supplied */
265 	if (mad_reg_req) {
266 		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
267 			dev_dbg_ratelimited(&device->dev,
268 					    "%s: invalid Class Version %u\n",
269 					    __func__,
270 					    mad_reg_req->mgmt_class_version);
271 			goto error1;
272 		}
273 		if (!recv_handler) {
274 			dev_dbg_ratelimited(&device->dev,
275 					    "%s: no recv_handler\n", __func__);
276 			goto error1;
277 		}
278 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
279 			/*
280 			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
281 			 * one in this range currently allowed
282 			 */
283 			if (mad_reg_req->mgmt_class !=
284 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
285 				dev_dbg_ratelimited(&device->dev,
286 					"%s: Invalid Mgmt Class 0x%x\n",
287 					__func__, mad_reg_req->mgmt_class);
288 				goto error1;
289 			}
290 		} else if (mad_reg_req->mgmt_class == 0) {
291 			/*
292 			 * Class 0 is reserved in IBA and is used for
293 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
294 			 */
295 			dev_dbg_ratelimited(&device->dev,
296 					    "%s: Invalid Mgmt Class 0\n",
297 					    __func__);
298 			goto error1;
299 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
300 			/*
301 			 * If class is in "new" vendor range,
302 			 * ensure supplied OUI is not zero
303 			 */
304 			if (!is_vendor_oui(mad_reg_req->oui)) {
305 				dev_dbg_ratelimited(&device->dev,
306 					"%s: No OUI specified for class 0x%x\n",
307 					__func__,
308 					mad_reg_req->mgmt_class);
309 				goto error1;
310 			}
311 		}
312 		/* Make sure class supplied is consistent with RMPP */
313 		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
314 			if (rmpp_version) {
315 				dev_dbg_ratelimited(&device->dev,
316 					"%s: RMPP version for non-RMPP class 0x%x\n",
317 					__func__, mad_reg_req->mgmt_class);
318 				goto error1;
319 			}
320 		}
321 
322 		/* Make sure class supplied is consistent with QP type */
323 		if (qp_type == IB_QPT_SMI) {
324 			if ((mad_reg_req->mgmt_class !=
325 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
326 			    (mad_reg_req->mgmt_class !=
327 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
328 				dev_dbg_ratelimited(&device->dev,
329 					"%s: Invalid SM QP type: class 0x%x\n",
330 					__func__, mad_reg_req->mgmt_class);
331 				goto error1;
332 			}
333 		} else {
334 			if ((mad_reg_req->mgmt_class ==
335 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
336 			    (mad_reg_req->mgmt_class ==
337 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
338 				dev_dbg_ratelimited(&device->dev,
339 					"%s: Invalid GS QP type: class 0x%x\n",
340 					__func__, mad_reg_req->mgmt_class);
341 				goto error1;
342 			}
343 		}
344 	} else {
345 		/* No registration request supplied */
346 		if (!send_handler)
347 			goto error1;
348 		if (registration_flags & IB_MAD_USER_RMPP)
349 			goto error1;
350 	}
351 
352 	/* Validate device and port */
353 	port_priv = ib_get_mad_port(device, port_num);
354 	if (!port_priv) {
355 		dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
356 				    __func__, port_num);
357 		ret = ERR_PTR(-ENODEV);
358 		goto error1;
359 	}
360 
361 	/* Verify the QP requested is supported. For example, Ethernet devices
362 	 * will not have QP0.
363 	 */
364 	if (!port_priv->qp_info[qpn].qp) {
365 		dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
366 				    __func__, qpn);
367 		ret = ERR_PTR(-EPROTONOSUPPORT);
368 		goto error1;
369 	}
370 
371 	/* Allocate structures */
372 	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
373 	if (!mad_agent_priv) {
374 		ret = ERR_PTR(-ENOMEM);
375 		goto error1;
376 	}
377 
378 	if (mad_reg_req) {
379 		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
380 		if (!reg_req) {
381 			ret = ERR_PTR(-ENOMEM);
382 			goto error3;
383 		}
384 	}
385 
386 	/* Now, fill in the various structures */
387 	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
388 	mad_agent_priv->reg_req = reg_req;
389 	mad_agent_priv->agent.rmpp_version = rmpp_version;
390 	mad_agent_priv->agent.device = device;
391 	mad_agent_priv->agent.recv_handler = recv_handler;
392 	mad_agent_priv->agent.send_handler = send_handler;
393 	mad_agent_priv->agent.context = context;
394 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
395 	mad_agent_priv->agent.port_num = port_num;
396 	mad_agent_priv->agent.flags = registration_flags;
397 	spin_lock_init(&mad_agent_priv->lock);
398 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
399 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
400 	INIT_LIST_HEAD(&mad_agent_priv->done_list);
401 	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
402 	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
403 	INIT_LIST_HEAD(&mad_agent_priv->local_list);
404 	INIT_WORK(&mad_agent_priv->local_work, local_completions);
405 	atomic_set(&mad_agent_priv->refcount, 1);
406 	init_completion(&mad_agent_priv->comp);
407 
408 	ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
409 	if (ret2) {
410 		ret = ERR_PTR(ret2);
411 		goto error4;
412 	}
413 
414 	/*
415 	 * The mlx4 driver uses the top byte to distinguish which virtual
416 	 * function generated the MAD, so we must avoid using it.
417 	 */
418 	ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
419 			mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
420 			&ib_mad_client_next, GFP_KERNEL);
421 	if (ret2 < 0) {
422 		ret = ERR_PTR(ret2);
423 		goto error5;
424 	}
425 
426 	/*
427 	 * Make sure MAD registration (if supplied)
428 	 * is non overlapping with any existing ones
429 	 */
430 	spin_lock_irq(&port_priv->reg_lock);
431 	if (mad_reg_req) {
432 		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
433 		if (!is_vendor_class(mgmt_class)) {
434 			class = port_priv->version[mad_reg_req->
435 						   mgmt_class_version].class;
436 			if (class) {
437 				method = class->method_table[mgmt_class];
438 				if (method) {
439 					if (method_in_use(&method,
440 							   mad_reg_req))
441 						goto error6;
442 				}
443 			}
444 			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
445 						  mgmt_class);
446 		} else {
447 			/* "New" vendor class range */
448 			vendor = port_priv->version[mad_reg_req->
449 						    mgmt_class_version].vendor;
450 			if (vendor) {
451 				vclass = vendor_class_index(mgmt_class);
452 				vendor_class = vendor->vendor_class[vclass];
453 				if (vendor_class) {
454 					if (is_vendor_method_in_use(
455 							vendor_class,
456 							mad_reg_req))
457 						goto error6;
458 				}
459 			}
460 			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
461 		}
462 		if (ret2) {
463 			ret = ERR_PTR(ret2);
464 			goto error6;
465 		}
466 	}
467 	spin_unlock_irq(&port_priv->reg_lock);
468 
469 	trace_ib_mad_create_agent(mad_agent_priv);
470 	return &mad_agent_priv->agent;
471 error6:
472 	spin_unlock_irq(&port_priv->reg_lock);
473 	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
474 error5:
475 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
476 error4:
477 	kfree(reg_req);
478 error3:
479 	kfree(mad_agent_priv);
480 error1:
481 	return ret;
482 }
483 EXPORT_SYMBOL(ib_register_mad_agent);
484 
485 static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
486 {
487 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
488 		complete(&mad_agent_priv->comp);
489 }
490 
491 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
492 {
493 	struct ib_mad_port_private *port_priv;
494 
495 	/* Note that we could still be handling received MADs */
496 	trace_ib_mad_unregister_agent(mad_agent_priv);
497 
498 	/*
499 	 * Canceling all sends results in dropping received response
500 	 * MADs, preventing us from queuing additional work
501 	 */
502 	cancel_mads(mad_agent_priv);
503 	port_priv = mad_agent_priv->qp_info->port_priv;
504 	cancel_delayed_work(&mad_agent_priv->timed_work);
505 
506 	spin_lock_irq(&port_priv->reg_lock);
507 	remove_mad_reg_req(mad_agent_priv);
508 	spin_unlock_irq(&port_priv->reg_lock);
509 	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
510 
511 	flush_workqueue(port_priv->wq);
512 	ib_cancel_rmpp_recvs(mad_agent_priv);
513 
514 	deref_mad_agent(mad_agent_priv);
515 	wait_for_completion(&mad_agent_priv->comp);
516 
517 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
518 
519 	kfree(mad_agent_priv->reg_req);
520 	kfree_rcu(mad_agent_priv, rcu);
521 }
522 
523 /*
524  * ib_unregister_mad_agent - Unregisters a client from using MAD services
525  *
526  * Context: Process context.
527  */
528 void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
529 {
530 	struct ib_mad_agent_private *mad_agent_priv;
531 
532 	mad_agent_priv = container_of(mad_agent,
533 				      struct ib_mad_agent_private,
534 				      agent);
535 	unregister_mad_agent(mad_agent_priv);
536 }
537 EXPORT_SYMBOL(ib_unregister_mad_agent);
538 
539 static void dequeue_mad(struct ib_mad_list_head *mad_list)
540 {
541 	struct ib_mad_queue *mad_queue;
542 	unsigned long flags;
543 
544 	mad_queue = mad_list->mad_queue;
545 	spin_lock_irqsave(&mad_queue->lock, flags);
546 	list_del(&mad_list->list);
547 	mad_queue->count--;
548 	spin_unlock_irqrestore(&mad_queue->lock, flags);
549 }
550 
551 static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
552 		u16 pkey_index, u8 port_num, struct ib_wc *wc)
553 {
554 	memset(wc, 0, sizeof *wc);
555 	wc->wr_cqe = cqe;
556 	wc->status = IB_WC_SUCCESS;
557 	wc->opcode = IB_WC_RECV;
558 	wc->pkey_index = pkey_index;
559 	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
560 	wc->src_qp = IB_QP0;
561 	wc->qp = qp;
562 	wc->slid = slid;
563 	wc->sl = 0;
564 	wc->dlid_path_bits = 0;
565 	wc->port_num = port_num;
566 }
567 
568 static size_t mad_priv_size(const struct ib_mad_private *mp)
569 {
570 	return sizeof(struct ib_mad_private) + mp->mad_size;
571 }
572 
573 static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags)
574 {
575 	size_t size = sizeof(struct ib_mad_private) + mad_size;
576 	struct ib_mad_private *ret = kzalloc(size, flags);
577 
578 	if (ret)
579 		ret->mad_size = mad_size;
580 
581 	return ret;
582 }
583 
584 static size_t port_mad_size(const struct ib_mad_port_private *port_priv)
585 {
586 	return rdma_max_mad_size(port_priv->device, port_priv->port_num);
587 }
588 
589 static size_t mad_priv_dma_size(const struct ib_mad_private *mp)
590 {
591 	return sizeof(struct ib_grh) + mp->mad_size;
592 }
593 
594 /*
595  * Return 0 if SMP is to be sent
596  * Return 1 if SMP was consumed locally (whether or not solicited)
597  * Return < 0 if error
598  */
599 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
600 				  struct ib_mad_send_wr_private *mad_send_wr)
601 {
602 	int ret = 0;
603 	struct ib_smp *smp = mad_send_wr->send_buf.mad;
604 	struct opa_smp *opa_smp = (struct opa_smp *)smp;
605 	unsigned long flags;
606 	struct ib_mad_local_private *local;
607 	struct ib_mad_private *mad_priv;
608 	struct ib_mad_port_private *port_priv;
609 	struct ib_mad_agent_private *recv_mad_agent = NULL;
610 	struct ib_device *device = mad_agent_priv->agent.device;
611 	u8 port_num;
612 	struct ib_wc mad_wc;
613 	struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
614 	size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
615 	u16 out_mad_pkey_index = 0;
616 	u16 drslid;
617 	bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
618 				    mad_agent_priv->qp_info->port_priv->port_num);
619 
620 	if (rdma_cap_ib_switch(device) &&
621 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
622 		port_num = send_wr->port_num;
623 	else
624 		port_num = mad_agent_priv->agent.port_num;
625 
626 	/*
627 	 * Directed route handling starts if the initial LID routed part of
628 	 * a request or the ending LID routed part of a response is empty.
629 	 * If we are at the start of the LID routed part, don't update the
630 	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
631 	 */
632 	if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
633 		u32 opa_drslid;
634 
635 		trace_ib_mad_handle_out_opa_smi(opa_smp);
636 
637 		if ((opa_get_smp_direction(opa_smp)
638 		     ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
639 		     OPA_LID_PERMISSIVE &&
640 		     opa_smi_handle_dr_smp_send(opa_smp,
641 						rdma_cap_ib_switch(device),
642 						port_num) == IB_SMI_DISCARD) {
643 			ret = -EINVAL;
644 			dev_err(&device->dev, "OPA Invalid directed route\n");
645 			goto out;
646 		}
647 		opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid);
648 		if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) &&
649 		    opa_drslid & 0xffff0000) {
650 			ret = -EINVAL;
651 			dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n",
652 			       opa_drslid);
653 			goto out;
654 		}
655 		drslid = (u16)(opa_drslid & 0x0000ffff);
656 
657 		/* Check to post send on QP or process locally */
658 		if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD &&
659 		    opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
660 			goto out;
661 	} else {
662 		trace_ib_mad_handle_out_ib_smi(smp);
663 
664 		if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
665 		     IB_LID_PERMISSIVE &&
666 		     smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
667 		     IB_SMI_DISCARD) {
668 			ret = -EINVAL;
669 			dev_err(&device->dev, "Invalid directed route\n");
670 			goto out;
671 		}
672 		drslid = be16_to_cpu(smp->dr_slid);
673 
674 		/* Check to post send on QP or process locally */
675 		if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
676 		    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
677 			goto out;
678 	}
679 
680 	local = kmalloc(sizeof *local, GFP_ATOMIC);
681 	if (!local) {
682 		ret = -ENOMEM;
683 		goto out;
684 	}
685 	local->mad_priv = NULL;
686 	local->recv_mad_agent = NULL;
687 	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
688 	if (!mad_priv) {
689 		ret = -ENOMEM;
690 		kfree(local);
691 		goto out;
692 	}
693 
694 	build_smp_wc(mad_agent_priv->agent.qp,
695 		     send_wr->wr.wr_cqe, drslid,
696 		     send_wr->pkey_index,
697 		     send_wr->port_num, &mad_wc);
698 
699 	if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
700 		mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
701 					+ mad_send_wr->send_buf.data_len
702 					+ sizeof(struct ib_grh);
703 	}
704 
705 	/* No GRH for DR SMP */
706 	ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
707 				      (const struct ib_mad *)smp,
708 				      (struct ib_mad *)mad_priv->mad, &mad_size,
709 				      &out_mad_pkey_index);
710 	switch (ret)
711 	{
712 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
713 		if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
714 		    mad_agent_priv->agent.recv_handler) {
715 			local->mad_priv = mad_priv;
716 			local->recv_mad_agent = mad_agent_priv;
717 			/*
718 			 * Reference MAD agent until receive
719 			 * side of local completion handled
720 			 */
721 			atomic_inc(&mad_agent_priv->refcount);
722 		} else
723 			kfree(mad_priv);
724 		break;
725 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
726 		kfree(mad_priv);
727 		break;
728 	case IB_MAD_RESULT_SUCCESS:
729 		/* Treat like an incoming receive MAD */
730 		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
731 					    mad_agent_priv->agent.port_num);
732 		if (port_priv) {
733 			memcpy(mad_priv->mad, smp, mad_priv->mad_size);
734 			recv_mad_agent = find_mad_agent(port_priv,
735 						        (const struct ib_mad_hdr *)mad_priv->mad);
736 		}
737 		if (!port_priv || !recv_mad_agent) {
738 			/*
739 			 * No receiving agent so drop packet and
740 			 * generate send completion.
741 			 */
742 			kfree(mad_priv);
743 			break;
744 		}
745 		local->mad_priv = mad_priv;
746 		local->recv_mad_agent = recv_mad_agent;
747 		break;
748 	default:
749 		kfree(mad_priv);
750 		kfree(local);
751 		ret = -EINVAL;
752 		goto out;
753 	}
754 
755 	local->mad_send_wr = mad_send_wr;
756 	if (opa) {
757 		local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
758 		local->return_wc_byte_len = mad_size;
759 	}
760 	/* Reference MAD agent until send side of local completion handled */
761 	atomic_inc(&mad_agent_priv->refcount);
762 	/* Queue local completion to local list */
763 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
764 	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
765 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
766 	queue_work(mad_agent_priv->qp_info->port_priv->wq,
767 		   &mad_agent_priv->local_work);
768 	ret = 1;
769 out:
770 	return ret;
771 }
772 
773 static int get_pad_size(int hdr_len, int data_len, size_t mad_size)
774 {
775 	int seg_size, pad;
776 
777 	seg_size = mad_size - hdr_len;
778 	if (data_len && seg_size) {
779 		pad = seg_size - data_len % seg_size;
780 		return pad == seg_size ? 0 : pad;
781 	} else
782 		return seg_size;
783 }
784 
785 static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
786 {
787 	struct ib_rmpp_segment *s, *t;
788 
789 	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
790 		list_del(&s->list);
791 		kfree(s);
792 	}
793 }
794 
795 static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
796 				size_t mad_size, gfp_t gfp_mask)
797 {
798 	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
799 	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
800 	struct ib_rmpp_segment *seg = NULL;
801 	int left, seg_size, pad;
802 
803 	send_buf->seg_size = mad_size - send_buf->hdr_len;
804 	send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR;
805 	seg_size = send_buf->seg_size;
806 	pad = send_wr->pad;
807 
808 	/* Allocate data segments. */
809 	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
810 		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
811 		if (!seg) {
812 			free_send_rmpp_list(send_wr);
813 			return -ENOMEM;
814 		}
815 		seg->num = ++send_buf->seg_count;
816 		list_add_tail(&seg->list, &send_wr->rmpp_list);
817 	}
818 
819 	/* Zero any padding */
820 	if (pad)
821 		memset(seg->data + seg_size - pad, 0, pad);
822 
823 	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
824 					  agent.rmpp_version;
825 	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
826 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
827 
828 	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
829 					struct ib_rmpp_segment, list);
830 	send_wr->last_ack_seg = send_wr->cur_seg;
831 	return 0;
832 }
833 
834 int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
835 {
836 	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
837 }
838 EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
839 
840 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
841 					    u32 remote_qpn, u16 pkey_index,
842 					    int rmpp_active,
843 					    int hdr_len, int data_len,
844 					    gfp_t gfp_mask,
845 					    u8 base_version)
846 {
847 	struct ib_mad_agent_private *mad_agent_priv;
848 	struct ib_mad_send_wr_private *mad_send_wr;
849 	int pad, message_size, ret, size;
850 	void *buf;
851 	size_t mad_size;
852 	bool opa;
853 
854 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
855 				      agent);
856 
857 	opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num);
858 
859 	if (opa && base_version == OPA_MGMT_BASE_VERSION)
860 		mad_size = sizeof(struct opa_mad);
861 	else
862 		mad_size = sizeof(struct ib_mad);
863 
864 	pad = get_pad_size(hdr_len, data_len, mad_size);
865 	message_size = hdr_len + data_len + pad;
866 
867 	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
868 		if (!rmpp_active && message_size > mad_size)
869 			return ERR_PTR(-EINVAL);
870 	} else
871 		if (rmpp_active || message_size > mad_size)
872 			return ERR_PTR(-EINVAL);
873 
874 	size = rmpp_active ? hdr_len : mad_size;
875 	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
876 	if (!buf)
877 		return ERR_PTR(-ENOMEM);
878 
879 	mad_send_wr = buf + size;
880 	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
881 	mad_send_wr->send_buf.mad = buf;
882 	mad_send_wr->send_buf.hdr_len = hdr_len;
883 	mad_send_wr->send_buf.data_len = data_len;
884 	mad_send_wr->pad = pad;
885 
886 	mad_send_wr->mad_agent_priv = mad_agent_priv;
887 	mad_send_wr->sg_list[0].length = hdr_len;
888 	mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
889 
890 	/* OPA MADs don't have to be the full 2048 bytes */
891 	if (opa && base_version == OPA_MGMT_BASE_VERSION &&
892 	    data_len < mad_size - hdr_len)
893 		mad_send_wr->sg_list[1].length = data_len;
894 	else
895 		mad_send_wr->sg_list[1].length = mad_size - hdr_len;
896 
897 	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
898 
899 	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
900 
901 	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
902 	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
903 	mad_send_wr->send_wr.wr.num_sge = 2;
904 	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
905 	mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
906 	mad_send_wr->send_wr.remote_qpn = remote_qpn;
907 	mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
908 	mad_send_wr->send_wr.pkey_index = pkey_index;
909 
910 	if (rmpp_active) {
911 		ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
912 		if (ret) {
913 			kfree(buf);
914 			return ERR_PTR(ret);
915 		}
916 	}
917 
918 	mad_send_wr->send_buf.mad_agent = mad_agent;
919 	atomic_inc(&mad_agent_priv->refcount);
920 	return &mad_send_wr->send_buf;
921 }
922 EXPORT_SYMBOL(ib_create_send_mad);
923 
924 int ib_get_mad_data_offset(u8 mgmt_class)
925 {
926 	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
927 		return IB_MGMT_SA_HDR;
928 	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
929 		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
930 		 (mgmt_class == IB_MGMT_CLASS_BIS))
931 		return IB_MGMT_DEVICE_HDR;
932 	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
933 		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
934 		return IB_MGMT_VENDOR_HDR;
935 	else
936 		return IB_MGMT_MAD_HDR;
937 }
938 EXPORT_SYMBOL(ib_get_mad_data_offset);
939 
940 int ib_is_mad_class_rmpp(u8 mgmt_class)
941 {
942 	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
943 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
944 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
945 	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
946 	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
947 	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
948 		return 1;
949 	return 0;
950 }
951 EXPORT_SYMBOL(ib_is_mad_class_rmpp);
952 
953 void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
954 {
955 	struct ib_mad_send_wr_private *mad_send_wr;
956 	struct list_head *list;
957 
958 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
959 				   send_buf);
960 	list = &mad_send_wr->cur_seg->list;
961 
962 	if (mad_send_wr->cur_seg->num < seg_num) {
963 		list_for_each_entry(mad_send_wr->cur_seg, list, list)
964 			if (mad_send_wr->cur_seg->num == seg_num)
965 				break;
966 	} else if (mad_send_wr->cur_seg->num > seg_num) {
967 		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
968 			if (mad_send_wr->cur_seg->num == seg_num)
969 				break;
970 	}
971 	return mad_send_wr->cur_seg->data;
972 }
973 EXPORT_SYMBOL(ib_get_rmpp_segment);
974 
975 static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
976 {
977 	if (mad_send_wr->send_buf.seg_count)
978 		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
979 					   mad_send_wr->seg_num);
980 	else
981 		return mad_send_wr->send_buf.mad +
982 		       mad_send_wr->send_buf.hdr_len;
983 }
984 
985 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
986 {
987 	struct ib_mad_agent_private *mad_agent_priv;
988 	struct ib_mad_send_wr_private *mad_send_wr;
989 
990 	mad_agent_priv = container_of(send_buf->mad_agent,
991 				      struct ib_mad_agent_private, agent);
992 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
993 				   send_buf);
994 
995 	free_send_rmpp_list(mad_send_wr);
996 	kfree(send_buf->mad);
997 	deref_mad_agent(mad_agent_priv);
998 }
999 EXPORT_SYMBOL(ib_free_send_mad);
1000 
1001 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1002 {
1003 	struct ib_mad_qp_info *qp_info;
1004 	struct list_head *list;
1005 	struct ib_mad_agent *mad_agent;
1006 	struct ib_sge *sge;
1007 	unsigned long flags;
1008 	int ret;
1009 
1010 	/* Set WR ID to find mad_send_wr upon completion */
1011 	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1012 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1013 	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
1014 	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
1015 
1016 	mad_agent = mad_send_wr->send_buf.mad_agent;
1017 	sge = mad_send_wr->sg_list;
1018 	sge[0].addr = ib_dma_map_single(mad_agent->device,
1019 					mad_send_wr->send_buf.mad,
1020 					sge[0].length,
1021 					DMA_TO_DEVICE);
1022 	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
1023 		return -ENOMEM;
1024 
1025 	mad_send_wr->header_mapping = sge[0].addr;
1026 
1027 	sge[1].addr = ib_dma_map_single(mad_agent->device,
1028 					ib_get_payload(mad_send_wr),
1029 					sge[1].length,
1030 					DMA_TO_DEVICE);
1031 	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
1032 		ib_dma_unmap_single(mad_agent->device,
1033 				    mad_send_wr->header_mapping,
1034 				    sge[0].length, DMA_TO_DEVICE);
1035 		return -ENOMEM;
1036 	}
1037 	mad_send_wr->payload_mapping = sge[1].addr;
1038 
1039 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1040 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1041 		trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
1042 		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1043 				   NULL);
1044 		list = &qp_info->send_queue.list;
1045 	} else {
1046 		ret = 0;
1047 		list = &qp_info->overflow_list;
1048 	}
1049 
1050 	if (!ret) {
1051 		qp_info->send_queue.count++;
1052 		list_add_tail(&mad_send_wr->mad_list.list, list);
1053 	}
1054 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1055 	if (ret) {
1056 		ib_dma_unmap_single(mad_agent->device,
1057 				    mad_send_wr->header_mapping,
1058 				    sge[0].length, DMA_TO_DEVICE);
1059 		ib_dma_unmap_single(mad_agent->device,
1060 				    mad_send_wr->payload_mapping,
1061 				    sge[1].length, DMA_TO_DEVICE);
1062 	}
1063 	return ret;
1064 }
1065 
1066 /*
1067  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1068  *  with the registered client
1069  */
1070 int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1071 		     struct ib_mad_send_buf **bad_send_buf)
1072 {
1073 	struct ib_mad_agent_private *mad_agent_priv;
1074 	struct ib_mad_send_buf *next_send_buf;
1075 	struct ib_mad_send_wr_private *mad_send_wr;
1076 	unsigned long flags;
1077 	int ret = -EINVAL;
1078 
1079 	/* Walk list of send WRs and post each on send list */
1080 	for (; send_buf; send_buf = next_send_buf) {
1081 		mad_send_wr = container_of(send_buf,
1082 					   struct ib_mad_send_wr_private,
1083 					   send_buf);
1084 		mad_agent_priv = mad_send_wr->mad_agent_priv;
1085 
1086 		ret = ib_mad_enforce_security(mad_agent_priv,
1087 					      mad_send_wr->send_wr.pkey_index);
1088 		if (ret)
1089 			goto error;
1090 
1091 		if (!send_buf->mad_agent->send_handler ||
1092 		    (send_buf->timeout_ms &&
1093 		     !send_buf->mad_agent->recv_handler)) {
1094 			ret = -EINVAL;
1095 			goto error;
1096 		}
1097 
1098 		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1099 			if (mad_agent_priv->agent.rmpp_version) {
1100 				ret = -EINVAL;
1101 				goto error;
1102 			}
1103 		}
1104 
1105 		/*
1106 		 * Save pointer to next work request to post in case the
1107 		 * current one completes, and the user modifies the work
1108 		 * request associated with the completion
1109 		 */
1110 		next_send_buf = send_buf->next;
1111 		mad_send_wr->send_wr.ah = send_buf->ah;
1112 
1113 		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1114 		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1115 			ret = handle_outgoing_dr_smp(mad_agent_priv,
1116 						     mad_send_wr);
1117 			if (ret < 0)		/* error */
1118 				goto error;
1119 			else if (ret == 1)	/* locally consumed */
1120 				continue;
1121 		}
1122 
1123 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1124 		/* Timeout will be updated after send completes */
1125 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1126 		mad_send_wr->max_retries = send_buf->retries;
1127 		mad_send_wr->retries_left = send_buf->retries;
1128 		send_buf->retries = 0;
1129 		/* Reference for work request to QP + response */
1130 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1131 		mad_send_wr->status = IB_WC_SUCCESS;
1132 
1133 		/* Reference MAD agent until send completes */
1134 		atomic_inc(&mad_agent_priv->refcount);
1135 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1136 		list_add_tail(&mad_send_wr->agent_list,
1137 			      &mad_agent_priv->send_list);
1138 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1139 
1140 		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1141 			ret = ib_send_rmpp_mad(mad_send_wr);
1142 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1143 				ret = ib_send_mad(mad_send_wr);
1144 		} else
1145 			ret = ib_send_mad(mad_send_wr);
1146 		if (ret < 0) {
1147 			/* Fail send request */
1148 			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1149 			list_del(&mad_send_wr->agent_list);
1150 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1151 			atomic_dec(&mad_agent_priv->refcount);
1152 			goto error;
1153 		}
1154 	}
1155 	return 0;
1156 error:
1157 	if (bad_send_buf)
1158 		*bad_send_buf = send_buf;
1159 	return ret;
1160 }
1161 EXPORT_SYMBOL(ib_post_send_mad);
1162 
1163 /*
1164  * ib_free_recv_mad - Returns data buffers used to receive
1165  *  a MAD to the access layer
1166  */
1167 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1168 {
1169 	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1170 	struct ib_mad_private_header *mad_priv_hdr;
1171 	struct ib_mad_private *priv;
1172 	struct list_head free_list;
1173 
1174 	INIT_LIST_HEAD(&free_list);
1175 	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1176 
1177 	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1178 					&free_list, list) {
1179 		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1180 					   recv_buf);
1181 		mad_priv_hdr = container_of(mad_recv_wc,
1182 					    struct ib_mad_private_header,
1183 					    recv_wc);
1184 		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1185 				    header);
1186 		kfree(priv);
1187 	}
1188 }
1189 EXPORT_SYMBOL(ib_free_recv_mad);
1190 
1191 static int method_in_use(struct ib_mad_mgmt_method_table **method,
1192 			 struct ib_mad_reg_req *mad_reg_req)
1193 {
1194 	int i;
1195 
1196 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1197 		if ((*method)->agent[i]) {
1198 			pr_err("Method %d already in use\n", i);
1199 			return -EINVAL;
1200 		}
1201 	}
1202 	return 0;
1203 }
1204 
1205 static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1206 {
1207 	/* Allocate management method table */
1208 	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1209 	return (*method) ? 0 : (-ENOMEM);
1210 }
1211 
1212 /*
1213  * Check to see if there are any methods still in use
1214  */
1215 static int check_method_table(struct ib_mad_mgmt_method_table *method)
1216 {
1217 	int i;
1218 
1219 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1220 		if (method->agent[i])
1221 			return 1;
1222 	return 0;
1223 }
1224 
1225 /*
1226  * Check to see if there are any method tables for this class still in use
1227  */
1228 static int check_class_table(struct ib_mad_mgmt_class_table *class)
1229 {
1230 	int i;
1231 
1232 	for (i = 0; i < MAX_MGMT_CLASS; i++)
1233 		if (class->method_table[i])
1234 			return 1;
1235 	return 0;
1236 }
1237 
1238 static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1239 {
1240 	int i;
1241 
1242 	for (i = 0; i < MAX_MGMT_OUI; i++)
1243 		if (vendor_class->method_table[i])
1244 			return 1;
1245 	return 0;
1246 }
1247 
1248 static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1249 			   const char *oui)
1250 {
1251 	int i;
1252 
1253 	for (i = 0; i < MAX_MGMT_OUI; i++)
1254 		/* Is there matching OUI for this vendor class ? */
1255 		if (!memcmp(vendor_class->oui[i], oui, 3))
1256 			return i;
1257 
1258 	return -1;
1259 }
1260 
1261 static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1262 {
1263 	int i;
1264 
1265 	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1266 		if (vendor->vendor_class[i])
1267 			return 1;
1268 
1269 	return 0;
1270 }
1271 
1272 static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1273 				     struct ib_mad_agent_private *agent)
1274 {
1275 	int i;
1276 
1277 	/* Remove any methods for this mad agent */
1278 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1279 		if (method->agent[i] == agent) {
1280 			method->agent[i] = NULL;
1281 		}
1282 	}
1283 }
1284 
1285 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1286 			      struct ib_mad_agent_private *agent_priv,
1287 			      u8 mgmt_class)
1288 {
1289 	struct ib_mad_port_private *port_priv;
1290 	struct ib_mad_mgmt_class_table **class;
1291 	struct ib_mad_mgmt_method_table **method;
1292 	int i, ret;
1293 
1294 	port_priv = agent_priv->qp_info->port_priv;
1295 	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1296 	if (!*class) {
1297 		/* Allocate management class table for "new" class version */
1298 		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1299 		if (!*class) {
1300 			ret = -ENOMEM;
1301 			goto error1;
1302 		}
1303 
1304 		/* Allocate method table for this management class */
1305 		method = &(*class)->method_table[mgmt_class];
1306 		if ((ret = allocate_method_table(method)))
1307 			goto error2;
1308 	} else {
1309 		method = &(*class)->method_table[mgmt_class];
1310 		if (!*method) {
1311 			/* Allocate method table for this management class */
1312 			if ((ret = allocate_method_table(method)))
1313 				goto error1;
1314 		}
1315 	}
1316 
1317 	/* Now, make sure methods are not already in use */
1318 	if (method_in_use(method, mad_reg_req))
1319 		goto error3;
1320 
1321 	/* Finally, add in methods being registered */
1322 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1323 		(*method)->agent[i] = agent_priv;
1324 
1325 	return 0;
1326 
1327 error3:
1328 	/* Remove any methods for this mad agent */
1329 	remove_methods_mad_agent(*method, agent_priv);
1330 	/* Now, check to see if there are any methods in use */
1331 	if (!check_method_table(*method)) {
1332 		/* If not, release management method table */
1333 		kfree(*method);
1334 		*method = NULL;
1335 	}
1336 	ret = -EINVAL;
1337 	goto error1;
1338 error2:
1339 	kfree(*class);
1340 	*class = NULL;
1341 error1:
1342 	return ret;
1343 }
1344 
1345 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1346 			   struct ib_mad_agent_private *agent_priv)
1347 {
1348 	struct ib_mad_port_private *port_priv;
1349 	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1350 	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1351 	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1352 	struct ib_mad_mgmt_method_table **method;
1353 	int i, ret = -ENOMEM;
1354 	u8 vclass;
1355 
1356 	/* "New" vendor (with OUI) class */
1357 	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1358 	port_priv = agent_priv->qp_info->port_priv;
1359 	vendor_table = &port_priv->version[
1360 				mad_reg_req->mgmt_class_version].vendor;
1361 	if (!*vendor_table) {
1362 		/* Allocate mgmt vendor class table for "new" class version */
1363 		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1364 		if (!vendor)
1365 			goto error1;
1366 
1367 		*vendor_table = vendor;
1368 	}
1369 	if (!(*vendor_table)->vendor_class[vclass]) {
1370 		/* Allocate table for this management vendor class */
1371 		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1372 		if (!vendor_class)
1373 			goto error2;
1374 
1375 		(*vendor_table)->vendor_class[vclass] = vendor_class;
1376 	}
1377 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1378 		/* Is there matching OUI for this vendor class ? */
1379 		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1380 			    mad_reg_req->oui, 3)) {
1381 			method = &(*vendor_table)->vendor_class[
1382 						vclass]->method_table[i];
1383 			if (!*method)
1384 				goto error3;
1385 			goto check_in_use;
1386 		}
1387 	}
1388 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1389 		/* OUI slot available ? */
1390 		if (!is_vendor_oui((*vendor_table)->vendor_class[
1391 				vclass]->oui[i])) {
1392 			method = &(*vendor_table)->vendor_class[
1393 				vclass]->method_table[i];
1394 			/* Allocate method table for this OUI */
1395 			if (!*method) {
1396 				ret = allocate_method_table(method);
1397 				if (ret)
1398 					goto error3;
1399 			}
1400 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1401 			       mad_reg_req->oui, 3);
1402 			goto check_in_use;
1403 		}
1404 	}
1405 	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1406 	goto error3;
1407 
1408 check_in_use:
1409 	/* Now, make sure methods are not already in use */
1410 	if (method_in_use(method, mad_reg_req))
1411 		goto error4;
1412 
1413 	/* Finally, add in methods being registered */
1414 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1415 		(*method)->agent[i] = agent_priv;
1416 
1417 	return 0;
1418 
1419 error4:
1420 	/* Remove any methods for this mad agent */
1421 	remove_methods_mad_agent(*method, agent_priv);
1422 	/* Now, check to see if there are any methods in use */
1423 	if (!check_method_table(*method)) {
1424 		/* If not, release management method table */
1425 		kfree(*method);
1426 		*method = NULL;
1427 	}
1428 	ret = -EINVAL;
1429 error3:
1430 	if (vendor_class) {
1431 		(*vendor_table)->vendor_class[vclass] = NULL;
1432 		kfree(vendor_class);
1433 	}
1434 error2:
1435 	if (vendor) {
1436 		*vendor_table = NULL;
1437 		kfree(vendor);
1438 	}
1439 error1:
1440 	return ret;
1441 }
1442 
1443 static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1444 {
1445 	struct ib_mad_port_private *port_priv;
1446 	struct ib_mad_mgmt_class_table *class;
1447 	struct ib_mad_mgmt_method_table *method;
1448 	struct ib_mad_mgmt_vendor_class_table *vendor;
1449 	struct ib_mad_mgmt_vendor_class *vendor_class;
1450 	int index;
1451 	u8 mgmt_class;
1452 
1453 	/*
1454 	 * Was MAD registration request supplied
1455 	 * with original registration ?
1456 	 */
1457 	if (!agent_priv->reg_req) {
1458 		goto out;
1459 	}
1460 
1461 	port_priv = agent_priv->qp_info->port_priv;
1462 	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1463 	class = port_priv->version[
1464 			agent_priv->reg_req->mgmt_class_version].class;
1465 	if (!class)
1466 		goto vendor_check;
1467 
1468 	method = class->method_table[mgmt_class];
1469 	if (method) {
1470 		/* Remove any methods for this mad agent */
1471 		remove_methods_mad_agent(method, agent_priv);
1472 		/* Now, check to see if there are any methods still in use */
1473 		if (!check_method_table(method)) {
1474 			/* If not, release management method table */
1475 			kfree(method);
1476 			class->method_table[mgmt_class] = NULL;
1477 			/* Any management classes left ? */
1478 			if (!check_class_table(class)) {
1479 				/* If not, release management class table */
1480 				kfree(class);
1481 				port_priv->version[
1482 					agent_priv->reg_req->
1483 					mgmt_class_version].class = NULL;
1484 			}
1485 		}
1486 	}
1487 
1488 vendor_check:
1489 	if (!is_vendor_class(mgmt_class))
1490 		goto out;
1491 
1492 	/* normalize mgmt_class to vendor range 2 */
1493 	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1494 	vendor = port_priv->version[
1495 			agent_priv->reg_req->mgmt_class_version].vendor;
1496 
1497 	if (!vendor)
1498 		goto out;
1499 
1500 	vendor_class = vendor->vendor_class[mgmt_class];
1501 	if (vendor_class) {
1502 		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1503 		if (index < 0)
1504 			goto out;
1505 		method = vendor_class->method_table[index];
1506 		if (method) {
1507 			/* Remove any methods for this mad agent */
1508 			remove_methods_mad_agent(method, agent_priv);
1509 			/*
1510 			 * Now, check to see if there are
1511 			 * any methods still in use
1512 			 */
1513 			if (!check_method_table(method)) {
1514 				/* If not, release management method table */
1515 				kfree(method);
1516 				vendor_class->method_table[index] = NULL;
1517 				memset(vendor_class->oui[index], 0, 3);
1518 				/* Any OUIs left ? */
1519 				if (!check_vendor_class(vendor_class)) {
1520 					/* If not, release vendor class table */
1521 					kfree(vendor_class);
1522 					vendor->vendor_class[mgmt_class] = NULL;
1523 					/* Any other vendor classes left ? */
1524 					if (!check_vendor_table(vendor)) {
1525 						kfree(vendor);
1526 						port_priv->version[
1527 							agent_priv->reg_req->
1528 							mgmt_class_version].
1529 							vendor = NULL;
1530 					}
1531 				}
1532 			}
1533 		}
1534 	}
1535 
1536 out:
1537 	return;
1538 }
1539 
1540 static struct ib_mad_agent_private *
1541 find_mad_agent(struct ib_mad_port_private *port_priv,
1542 	       const struct ib_mad_hdr *mad_hdr)
1543 {
1544 	struct ib_mad_agent_private *mad_agent = NULL;
1545 	unsigned long flags;
1546 
1547 	if (ib_response_mad(mad_hdr)) {
1548 		u32 hi_tid;
1549 
1550 		/*
1551 		 * Routing is based on high 32 bits of transaction ID
1552 		 * of MAD.
1553 		 */
1554 		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
1555 		rcu_read_lock();
1556 		mad_agent = xa_load(&ib_mad_clients, hi_tid);
1557 		if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
1558 			mad_agent = NULL;
1559 		rcu_read_unlock();
1560 	} else {
1561 		struct ib_mad_mgmt_class_table *class;
1562 		struct ib_mad_mgmt_method_table *method;
1563 		struct ib_mad_mgmt_vendor_class_table *vendor;
1564 		struct ib_mad_mgmt_vendor_class *vendor_class;
1565 		const struct ib_vendor_mad *vendor_mad;
1566 		int index;
1567 
1568 		spin_lock_irqsave(&port_priv->reg_lock, flags);
1569 		/*
1570 		 * Routing is based on version, class, and method
1571 		 * For "newer" vendor MADs, also based on OUI
1572 		 */
1573 		if (mad_hdr->class_version >= MAX_MGMT_VERSION)
1574 			goto out;
1575 		if (!is_vendor_class(mad_hdr->mgmt_class)) {
1576 			class = port_priv->version[
1577 					mad_hdr->class_version].class;
1578 			if (!class)
1579 				goto out;
1580 			if (convert_mgmt_class(mad_hdr->mgmt_class) >=
1581 			    ARRAY_SIZE(class->method_table))
1582 				goto out;
1583 			method = class->method_table[convert_mgmt_class(
1584 							mad_hdr->mgmt_class)];
1585 			if (method)
1586 				mad_agent = method->agent[mad_hdr->method &
1587 							  ~IB_MGMT_METHOD_RESP];
1588 		} else {
1589 			vendor = port_priv->version[
1590 					mad_hdr->class_version].vendor;
1591 			if (!vendor)
1592 				goto out;
1593 			vendor_class = vendor->vendor_class[vendor_class_index(
1594 						mad_hdr->mgmt_class)];
1595 			if (!vendor_class)
1596 				goto out;
1597 			/* Find matching OUI */
1598 			vendor_mad = (const struct ib_vendor_mad *)mad_hdr;
1599 			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1600 			if (index == -1)
1601 				goto out;
1602 			method = vendor_class->method_table[index];
1603 			if (method) {
1604 				mad_agent = method->agent[mad_hdr->method &
1605 							  ~IB_MGMT_METHOD_RESP];
1606 			}
1607 		}
1608 		if (mad_agent)
1609 			atomic_inc(&mad_agent->refcount);
1610 out:
1611 		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1612 	}
1613 
1614 	if (mad_agent && !mad_agent->agent.recv_handler) {
1615 		dev_notice(&port_priv->device->dev,
1616 			   "No receive handler for client %p on port %d\n",
1617 			   &mad_agent->agent, port_priv->port_num);
1618 		deref_mad_agent(mad_agent);
1619 		mad_agent = NULL;
1620 	}
1621 
1622 	return mad_agent;
1623 }
1624 
1625 static int validate_mad(const struct ib_mad_hdr *mad_hdr,
1626 			const struct ib_mad_qp_info *qp_info,
1627 			bool opa)
1628 {
1629 	int valid = 0;
1630 	u32 qp_num = qp_info->qp->qp_num;
1631 
1632 	/* Make sure MAD base version is understood */
1633 	if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
1634 	    (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
1635 		pr_err("MAD received with unsupported base version %d %s\n",
1636 		       mad_hdr->base_version, opa ? "(opa)" : "");
1637 		goto out;
1638 	}
1639 
1640 	/* Filter SMI packets sent to other than QP0 */
1641 	if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1642 	    (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1643 		if (qp_num == 0)
1644 			valid = 1;
1645 	} else {
1646 		/* CM attributes other than ClassPortInfo only use Send method */
1647 		if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
1648 		    (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
1649 		    (mad_hdr->method != IB_MGMT_METHOD_SEND))
1650 			goto out;
1651 		/* Filter GSI packets sent to QP0 */
1652 		if (qp_num != 0)
1653 			valid = 1;
1654 	}
1655 
1656 out:
1657 	return valid;
1658 }
1659 
1660 static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv,
1661 			    const struct ib_mad_hdr *mad_hdr)
1662 {
1663 	struct ib_rmpp_mad *rmpp_mad;
1664 
1665 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1666 	return !mad_agent_priv->agent.rmpp_version ||
1667 		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1668 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1669 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1670 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1671 }
1672 
1673 static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
1674 				     const struct ib_mad_recv_wc *rwc)
1675 {
1676 	return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class ==
1677 		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1678 }
1679 
1680 static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
1681 				   const struct ib_mad_send_wr_private *wr,
1682 				   const struct ib_mad_recv_wc *rwc )
1683 {
1684 	struct rdma_ah_attr attr;
1685 	u8 send_resp, rcv_resp;
1686 	union ib_gid sgid;
1687 	struct ib_device *device = mad_agent_priv->agent.device;
1688 	u8 port_num = mad_agent_priv->agent.port_num;
1689 	u8 lmc;
1690 	bool has_grh;
1691 
1692 	send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
1693 	rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
1694 
1695 	if (send_resp == rcv_resp)
1696 		/* both requests, or both responses. GIDs different */
1697 		return 0;
1698 
1699 	if (rdma_query_ah(wr->send_buf.ah, &attr))
1700 		/* Assume not equal, to avoid false positives. */
1701 		return 0;
1702 
1703 	has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
1704 	if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
1705 		/* one has GID, other does not.  Assume different */
1706 		return 0;
1707 
1708 	if (!send_resp && rcv_resp) {
1709 		/* is request/response. */
1710 		if (!has_grh) {
1711 			if (ib_get_cached_lmc(device, port_num, &lmc))
1712 				return 0;
1713 			return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
1714 					   rwc->wc->dlid_path_bits) &
1715 					  ((1 << lmc) - 1)));
1716 		} else {
1717 			const struct ib_global_route *grh =
1718 					rdma_ah_read_grh(&attr);
1719 
1720 			if (rdma_query_gid(device, port_num,
1721 					   grh->sgid_index, &sgid))
1722 				return 0;
1723 			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1724 				       16);
1725 		}
1726 	}
1727 
1728 	if (!has_grh)
1729 		return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
1730 	else
1731 		return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
1732 			       rwc->recv_buf.grh->sgid.raw,
1733 			       16);
1734 }
1735 
1736 static inline int is_direct(u8 class)
1737 {
1738 	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1739 }
1740 
1741 struct ib_mad_send_wr_private*
1742 ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
1743 		 const struct ib_mad_recv_wc *wc)
1744 {
1745 	struct ib_mad_send_wr_private *wr;
1746 	const struct ib_mad_hdr *mad_hdr;
1747 
1748 	mad_hdr = &wc->recv_buf.mad->mad_hdr;
1749 
1750 	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1751 		if ((wr->tid == mad_hdr->tid) &&
1752 		    rcv_has_same_class(wr, wc) &&
1753 		    /*
1754 		     * Don't check GID for direct routed MADs.
1755 		     * These might have permissive LIDs.
1756 		     */
1757 		    (is_direct(mad_hdr->mgmt_class) ||
1758 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1759 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1760 	}
1761 
1762 	/*
1763 	 * It's possible to receive the response before we've
1764 	 * been notified that the send has completed
1765 	 */
1766 	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1767 		if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1768 		    wr->tid == mad_hdr->tid &&
1769 		    wr->timeout &&
1770 		    rcv_has_same_class(wr, wc) &&
1771 		    /*
1772 		     * Don't check GID for direct routed MADs.
1773 		     * These might have permissive LIDs.
1774 		     */
1775 		    (is_direct(mad_hdr->mgmt_class) ||
1776 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1777 			/* Verify request has not been canceled */
1778 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1779 	}
1780 	return NULL;
1781 }
1782 
1783 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1784 {
1785 	mad_send_wr->timeout = 0;
1786 	if (mad_send_wr->refcount == 1)
1787 		list_move_tail(&mad_send_wr->agent_list,
1788 			      &mad_send_wr->mad_agent_priv->done_list);
1789 }
1790 
1791 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1792 				 struct ib_mad_recv_wc *mad_recv_wc)
1793 {
1794 	struct ib_mad_send_wr_private *mad_send_wr;
1795 	struct ib_mad_send_wc mad_send_wc;
1796 	unsigned long flags;
1797 	int ret;
1798 
1799 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1800 	ret = ib_mad_enforce_security(mad_agent_priv,
1801 				      mad_recv_wc->wc->pkey_index);
1802 	if (ret) {
1803 		ib_free_recv_mad(mad_recv_wc);
1804 		deref_mad_agent(mad_agent_priv);
1805 		return;
1806 	}
1807 
1808 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1809 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1810 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1811 						      mad_recv_wc);
1812 		if (!mad_recv_wc) {
1813 			deref_mad_agent(mad_agent_priv);
1814 			return;
1815 		}
1816 	}
1817 
1818 	/* Complete corresponding request */
1819 	if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) {
1820 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1821 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1822 		if (!mad_send_wr) {
1823 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1824 			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1825 			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1826 			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1827 					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
1828 				/* user rmpp is in effect
1829 				 * and this is an active RMPP MAD
1830 				 */
1831 				mad_agent_priv->agent.recv_handler(
1832 						&mad_agent_priv->agent, NULL,
1833 						mad_recv_wc);
1834 				atomic_dec(&mad_agent_priv->refcount);
1835 			} else {
1836 				/* not user rmpp, revert to normal behavior and
1837 				 * drop the mad */
1838 				ib_free_recv_mad(mad_recv_wc);
1839 				deref_mad_agent(mad_agent_priv);
1840 				return;
1841 			}
1842 		} else {
1843 			ib_mark_mad_done(mad_send_wr);
1844 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1845 
1846 			/* Defined behavior is to complete response before request */
1847 			mad_agent_priv->agent.recv_handler(
1848 					&mad_agent_priv->agent,
1849 					&mad_send_wr->send_buf,
1850 					mad_recv_wc);
1851 			atomic_dec(&mad_agent_priv->refcount);
1852 
1853 			mad_send_wc.status = IB_WC_SUCCESS;
1854 			mad_send_wc.vendor_err = 0;
1855 			mad_send_wc.send_buf = &mad_send_wr->send_buf;
1856 			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1857 		}
1858 	} else {
1859 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
1860 						   mad_recv_wc);
1861 		deref_mad_agent(mad_agent_priv);
1862 	}
1863 
1864 	return;
1865 }
1866 
1867 static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
1868 				     const struct ib_mad_qp_info *qp_info,
1869 				     const struct ib_wc *wc,
1870 				     int port_num,
1871 				     struct ib_mad_private *recv,
1872 				     struct ib_mad_private *response)
1873 {
1874 	enum smi_forward_action retsmi;
1875 	struct ib_smp *smp = (struct ib_smp *)recv->mad;
1876 
1877 	trace_ib_mad_handle_ib_smi(smp);
1878 
1879 	if (smi_handle_dr_smp_recv(smp,
1880 				   rdma_cap_ib_switch(port_priv->device),
1881 				   port_num,
1882 				   port_priv->device->phys_port_cnt) ==
1883 				   IB_SMI_DISCARD)
1884 		return IB_SMI_DISCARD;
1885 
1886 	retsmi = smi_check_forward_dr_smp(smp);
1887 	if (retsmi == IB_SMI_LOCAL)
1888 		return IB_SMI_HANDLE;
1889 
1890 	if (retsmi == IB_SMI_SEND) { /* don't forward */
1891 		if (smi_handle_dr_smp_send(smp,
1892 					   rdma_cap_ib_switch(port_priv->device),
1893 					   port_num) == IB_SMI_DISCARD)
1894 			return IB_SMI_DISCARD;
1895 
1896 		if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD)
1897 			return IB_SMI_DISCARD;
1898 	} else if (rdma_cap_ib_switch(port_priv->device)) {
1899 		/* forward case for switches */
1900 		memcpy(response, recv, mad_priv_size(response));
1901 		response->header.recv_wc.wc = &response->header.wc;
1902 		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1903 		response->header.recv_wc.recv_buf.grh = &response->grh;
1904 
1905 		agent_send_response((const struct ib_mad_hdr *)response->mad,
1906 				    &response->grh, wc,
1907 				    port_priv->device,
1908 				    smi_get_fwd_port(smp),
1909 				    qp_info->qp->qp_num,
1910 				    response->mad_size,
1911 				    false);
1912 
1913 		return IB_SMI_DISCARD;
1914 	}
1915 	return IB_SMI_HANDLE;
1916 }
1917 
1918 static bool generate_unmatched_resp(const struct ib_mad_private *recv,
1919 				    struct ib_mad_private *response,
1920 				    size_t *resp_len, bool opa)
1921 {
1922 	const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad;
1923 	struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad;
1924 
1925 	if (recv_hdr->method == IB_MGMT_METHOD_GET ||
1926 	    recv_hdr->method == IB_MGMT_METHOD_SET) {
1927 		memcpy(response, recv, mad_priv_size(response));
1928 		response->header.recv_wc.wc = &response->header.wc;
1929 		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1930 		response->header.recv_wc.recv_buf.grh = &response->grh;
1931 		resp_hdr->method = IB_MGMT_METHOD_GET_RESP;
1932 		resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
1933 		if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1934 			resp_hdr->status |= IB_SMP_DIRECTION;
1935 
1936 		if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) {
1937 			if (recv_hdr->mgmt_class ==
1938 			    IB_MGMT_CLASS_SUBN_LID_ROUTED ||
1939 			    recv_hdr->mgmt_class ==
1940 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1941 				*resp_len = opa_get_smp_header_size(
1942 							(struct opa_smp *)recv->mad);
1943 			else
1944 				*resp_len = sizeof(struct ib_mad_hdr);
1945 		}
1946 
1947 		return true;
1948 	} else {
1949 		return false;
1950 	}
1951 }
1952 
1953 static enum smi_action
1954 handle_opa_smi(struct ib_mad_port_private *port_priv,
1955 	       struct ib_mad_qp_info *qp_info,
1956 	       struct ib_wc *wc,
1957 	       int port_num,
1958 	       struct ib_mad_private *recv,
1959 	       struct ib_mad_private *response)
1960 {
1961 	enum smi_forward_action retsmi;
1962 	struct opa_smp *smp = (struct opa_smp *)recv->mad;
1963 
1964 	trace_ib_mad_handle_opa_smi(smp);
1965 
1966 	if (opa_smi_handle_dr_smp_recv(smp,
1967 				   rdma_cap_ib_switch(port_priv->device),
1968 				   port_num,
1969 				   port_priv->device->phys_port_cnt) ==
1970 				   IB_SMI_DISCARD)
1971 		return IB_SMI_DISCARD;
1972 
1973 	retsmi = opa_smi_check_forward_dr_smp(smp);
1974 	if (retsmi == IB_SMI_LOCAL)
1975 		return IB_SMI_HANDLE;
1976 
1977 	if (retsmi == IB_SMI_SEND) { /* don't forward */
1978 		if (opa_smi_handle_dr_smp_send(smp,
1979 					   rdma_cap_ib_switch(port_priv->device),
1980 					   port_num) == IB_SMI_DISCARD)
1981 			return IB_SMI_DISCARD;
1982 
1983 		if (opa_smi_check_local_smp(smp, port_priv->device) ==
1984 		    IB_SMI_DISCARD)
1985 			return IB_SMI_DISCARD;
1986 
1987 	} else if (rdma_cap_ib_switch(port_priv->device)) {
1988 		/* forward case for switches */
1989 		memcpy(response, recv, mad_priv_size(response));
1990 		response->header.recv_wc.wc = &response->header.wc;
1991 		response->header.recv_wc.recv_buf.opa_mad =
1992 				(struct opa_mad *)response->mad;
1993 		response->header.recv_wc.recv_buf.grh = &response->grh;
1994 
1995 		agent_send_response((const struct ib_mad_hdr *)response->mad,
1996 				    &response->grh, wc,
1997 				    port_priv->device,
1998 				    opa_smi_get_fwd_port(smp),
1999 				    qp_info->qp->qp_num,
2000 				    recv->header.wc.byte_len,
2001 				    true);
2002 
2003 		return IB_SMI_DISCARD;
2004 	}
2005 
2006 	return IB_SMI_HANDLE;
2007 }
2008 
2009 static enum smi_action
2010 handle_smi(struct ib_mad_port_private *port_priv,
2011 	   struct ib_mad_qp_info *qp_info,
2012 	   struct ib_wc *wc,
2013 	   int port_num,
2014 	   struct ib_mad_private *recv,
2015 	   struct ib_mad_private *response,
2016 	   bool opa)
2017 {
2018 	struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
2019 
2020 	if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
2021 	    mad_hdr->class_version == OPA_SM_CLASS_VERSION)
2022 		return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
2023 				      response);
2024 
2025 	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
2026 }
2027 
2028 static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2029 {
2030 	struct ib_mad_port_private *port_priv = cq->cq_context;
2031 	struct ib_mad_list_head *mad_list =
2032 		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2033 	struct ib_mad_qp_info *qp_info;
2034 	struct ib_mad_private_header *mad_priv_hdr;
2035 	struct ib_mad_private *recv, *response = NULL;
2036 	struct ib_mad_agent_private *mad_agent;
2037 	int port_num;
2038 	int ret = IB_MAD_RESULT_SUCCESS;
2039 	size_t mad_size;
2040 	u16 resp_mad_pkey_index = 0;
2041 	bool opa;
2042 
2043 	if (list_empty_careful(&port_priv->port_list))
2044 		return;
2045 
2046 	if (wc->status != IB_WC_SUCCESS) {
2047 		/*
2048 		 * Receive errors indicate that the QP has entered the error
2049 		 * state - error handling/shutdown code will cleanup
2050 		 */
2051 		return;
2052 	}
2053 
2054 	qp_info = mad_list->mad_queue->qp_info;
2055 	dequeue_mad(mad_list);
2056 
2057 	opa = rdma_cap_opa_mad(qp_info->port_priv->device,
2058 			       qp_info->port_priv->port_num);
2059 
2060 	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
2061 				    mad_list);
2062 	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
2063 	ib_dma_unmap_single(port_priv->device,
2064 			    recv->header.mapping,
2065 			    mad_priv_dma_size(recv),
2066 			    DMA_FROM_DEVICE);
2067 
2068 	/* Setup MAD receive work completion from "normal" work completion */
2069 	recv->header.wc = *wc;
2070 	recv->header.recv_wc.wc = &recv->header.wc;
2071 
2072 	if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) {
2073 		recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh);
2074 		recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2075 	} else {
2076 		recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2077 		recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2078 	}
2079 
2080 	recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
2081 	recv->header.recv_wc.recv_buf.grh = &recv->grh;
2082 
2083 	/* Validate MAD */
2084 	if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
2085 		goto out;
2086 
2087 	trace_ib_mad_recv_done_handler(qp_info, wc,
2088 				       (struct ib_mad_hdr *)recv->mad);
2089 
2090 	mad_size = recv->mad_size;
2091 	response = alloc_mad_private(mad_size, GFP_KERNEL);
2092 	if (!response)
2093 		goto out;
2094 
2095 	if (rdma_cap_ib_switch(port_priv->device))
2096 		port_num = wc->port_num;
2097 	else
2098 		port_num = port_priv->port_num;
2099 
2100 	if (((struct ib_mad_hdr *)recv->mad)->mgmt_class ==
2101 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
2102 		if (handle_smi(port_priv, qp_info, wc, port_num, recv,
2103 			       response, opa)
2104 		    == IB_SMI_DISCARD)
2105 			goto out;
2106 	}
2107 
2108 	/* Give driver "right of first refusal" on incoming MAD */
2109 	if (port_priv->device->ops.process_mad) {
2110 		ret = port_priv->device->ops.process_mad(
2111 			port_priv->device, 0, port_priv->port_num, wc,
2112 			&recv->grh, (const struct ib_mad *)recv->mad,
2113 			(struct ib_mad *)response->mad, &mad_size,
2114 			&resp_mad_pkey_index);
2115 
2116 		if (opa)
2117 			wc->pkey_index = resp_mad_pkey_index;
2118 
2119 		if (ret & IB_MAD_RESULT_SUCCESS) {
2120 			if (ret & IB_MAD_RESULT_CONSUMED)
2121 				goto out;
2122 			if (ret & IB_MAD_RESULT_REPLY) {
2123 				agent_send_response((const struct ib_mad_hdr *)response->mad,
2124 						    &recv->grh, wc,
2125 						    port_priv->device,
2126 						    port_num,
2127 						    qp_info->qp->qp_num,
2128 						    mad_size, opa);
2129 				goto out;
2130 			}
2131 		}
2132 	}
2133 
2134 	mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
2135 	if (mad_agent) {
2136 		trace_ib_mad_recv_done_agent(mad_agent);
2137 		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
2138 		/*
2139 		 * recv is freed up in error cases in ib_mad_complete_recv
2140 		 * or via recv_handler in ib_mad_complete_recv()
2141 		 */
2142 		recv = NULL;
2143 	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
2144 		   generate_unmatched_resp(recv, response, &mad_size, opa)) {
2145 		agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc,
2146 				    port_priv->device, port_num,
2147 				    qp_info->qp->qp_num, mad_size, opa);
2148 	}
2149 
2150 out:
2151 	/* Post another receive request for this QP */
2152 	if (response) {
2153 		ib_mad_post_receive_mads(qp_info, response);
2154 		kfree(recv);
2155 	} else
2156 		ib_mad_post_receive_mads(qp_info, recv);
2157 }
2158 
2159 static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2160 {
2161 	struct ib_mad_send_wr_private *mad_send_wr;
2162 	unsigned long delay;
2163 
2164 	if (list_empty(&mad_agent_priv->wait_list)) {
2165 		cancel_delayed_work(&mad_agent_priv->timed_work);
2166 	} else {
2167 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2168 					 struct ib_mad_send_wr_private,
2169 					 agent_list);
2170 
2171 		if (time_after(mad_agent_priv->timeout,
2172 			       mad_send_wr->timeout)) {
2173 			mad_agent_priv->timeout = mad_send_wr->timeout;
2174 			delay = mad_send_wr->timeout - jiffies;
2175 			if ((long)delay <= 0)
2176 				delay = 1;
2177 			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2178 					 &mad_agent_priv->timed_work, delay);
2179 		}
2180 	}
2181 }
2182 
2183 static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2184 {
2185 	struct ib_mad_agent_private *mad_agent_priv;
2186 	struct ib_mad_send_wr_private *temp_mad_send_wr;
2187 	struct list_head *list_item;
2188 	unsigned long delay;
2189 
2190 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2191 	list_del(&mad_send_wr->agent_list);
2192 
2193 	delay = mad_send_wr->timeout;
2194 	mad_send_wr->timeout += jiffies;
2195 
2196 	if (delay) {
2197 		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2198 			temp_mad_send_wr = list_entry(list_item,
2199 						struct ib_mad_send_wr_private,
2200 						agent_list);
2201 			if (time_after(mad_send_wr->timeout,
2202 				       temp_mad_send_wr->timeout))
2203 				break;
2204 		}
2205 	}
2206 	else
2207 		list_item = &mad_agent_priv->wait_list;
2208 	list_add(&mad_send_wr->agent_list, list_item);
2209 
2210 	/* Reschedule a work item if we have a shorter timeout */
2211 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2212 		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2213 				 &mad_agent_priv->timed_work, delay);
2214 }
2215 
2216 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2217 			  unsigned long timeout_ms)
2218 {
2219 	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2220 	wait_for_response(mad_send_wr);
2221 }
2222 
2223 /*
2224  * Process a send work completion
2225  */
2226 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2227 			     struct ib_mad_send_wc *mad_send_wc)
2228 {
2229 	struct ib_mad_agent_private	*mad_agent_priv;
2230 	unsigned long			flags;
2231 	int				ret;
2232 
2233 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2234 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2235 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2236 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2237 		if (ret == IB_RMPP_RESULT_CONSUMED)
2238 			goto done;
2239 	} else
2240 		ret = IB_RMPP_RESULT_UNHANDLED;
2241 
2242 	if (mad_send_wc->status != IB_WC_SUCCESS &&
2243 	    mad_send_wr->status == IB_WC_SUCCESS) {
2244 		mad_send_wr->status = mad_send_wc->status;
2245 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2246 	}
2247 
2248 	if (--mad_send_wr->refcount > 0) {
2249 		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2250 		    mad_send_wr->status == IB_WC_SUCCESS) {
2251 			wait_for_response(mad_send_wr);
2252 		}
2253 		goto done;
2254 	}
2255 
2256 	/* Remove send from MAD agent and notify client of completion */
2257 	list_del(&mad_send_wr->agent_list);
2258 	adjust_timeout(mad_agent_priv);
2259 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2260 
2261 	if (mad_send_wr->status != IB_WC_SUCCESS )
2262 		mad_send_wc->status = mad_send_wr->status;
2263 	if (ret == IB_RMPP_RESULT_INTERNAL)
2264 		ib_rmpp_send_handler(mad_send_wc);
2265 	else
2266 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2267 						   mad_send_wc);
2268 
2269 	/* Release reference on agent taken when sending */
2270 	deref_mad_agent(mad_agent_priv);
2271 	return;
2272 done:
2273 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2274 }
2275 
2276 static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
2277 {
2278 	struct ib_mad_port_private *port_priv = cq->cq_context;
2279 	struct ib_mad_list_head *mad_list =
2280 		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2281 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2282 	struct ib_mad_qp_info		*qp_info;
2283 	struct ib_mad_queue		*send_queue;
2284 	struct ib_mad_send_wc		mad_send_wc;
2285 	unsigned long flags;
2286 	int ret;
2287 
2288 	if (list_empty_careful(&port_priv->port_list))
2289 		return;
2290 
2291 	if (wc->status != IB_WC_SUCCESS) {
2292 		if (!ib_mad_send_error(port_priv, wc))
2293 			return;
2294 	}
2295 
2296 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2297 				   mad_list);
2298 	send_queue = mad_list->mad_queue;
2299 	qp_info = send_queue->qp_info;
2300 
2301 	trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
2302 	trace_ib_mad_send_done_handler(mad_send_wr, wc);
2303 
2304 retry:
2305 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2306 			    mad_send_wr->header_mapping,
2307 			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2308 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2309 			    mad_send_wr->payload_mapping,
2310 			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2311 	queued_send_wr = NULL;
2312 	spin_lock_irqsave(&send_queue->lock, flags);
2313 	list_del(&mad_list->list);
2314 
2315 	/* Move queued send to the send queue */
2316 	if (send_queue->count-- > send_queue->max_active) {
2317 		mad_list = container_of(qp_info->overflow_list.next,
2318 					struct ib_mad_list_head, list);
2319 		queued_send_wr = container_of(mad_list,
2320 					struct ib_mad_send_wr_private,
2321 					mad_list);
2322 		list_move_tail(&mad_list->list, &send_queue->list);
2323 	}
2324 	spin_unlock_irqrestore(&send_queue->lock, flags);
2325 
2326 	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2327 	mad_send_wc.status = wc->status;
2328 	mad_send_wc.vendor_err = wc->vendor_err;
2329 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2330 
2331 	if (queued_send_wr) {
2332 		trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
2333 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2334 				   NULL);
2335 		if (ret) {
2336 			dev_err(&port_priv->device->dev,
2337 				"ib_post_send failed: %d\n", ret);
2338 			mad_send_wr = queued_send_wr;
2339 			wc->status = IB_WC_LOC_QP_OP_ERR;
2340 			goto retry;
2341 		}
2342 	}
2343 }
2344 
2345 static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2346 {
2347 	struct ib_mad_send_wr_private *mad_send_wr;
2348 	struct ib_mad_list_head *mad_list;
2349 	unsigned long flags;
2350 
2351 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2352 	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2353 		mad_send_wr = container_of(mad_list,
2354 					   struct ib_mad_send_wr_private,
2355 					   mad_list);
2356 		mad_send_wr->retry = 1;
2357 	}
2358 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2359 }
2360 
2361 static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
2362 		struct ib_wc *wc)
2363 {
2364 	struct ib_mad_list_head *mad_list =
2365 		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2366 	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
2367 	struct ib_mad_send_wr_private *mad_send_wr;
2368 	int ret;
2369 
2370 	/*
2371 	 * Send errors will transition the QP to SQE - move
2372 	 * QP to RTS and repost flushed work requests
2373 	 */
2374 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2375 				   mad_list);
2376 	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2377 		if (mad_send_wr->retry) {
2378 			/* Repost send */
2379 			mad_send_wr->retry = 0;
2380 			trace_ib_mad_error_handler(mad_send_wr, qp_info);
2381 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2382 					   NULL);
2383 			if (!ret)
2384 				return false;
2385 		}
2386 	} else {
2387 		struct ib_qp_attr *attr;
2388 
2389 		/* Transition QP to RTS and fail offending send */
2390 		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2391 		if (attr) {
2392 			attr->qp_state = IB_QPS_RTS;
2393 			attr->cur_qp_state = IB_QPS_SQE;
2394 			ret = ib_modify_qp(qp_info->qp, attr,
2395 					   IB_QP_STATE | IB_QP_CUR_STATE);
2396 			kfree(attr);
2397 			if (ret)
2398 				dev_err(&port_priv->device->dev,
2399 					"%s - ib_modify_qp to RTS: %d\n",
2400 					__func__, ret);
2401 			else
2402 				mark_sends_for_retry(qp_info);
2403 		}
2404 	}
2405 
2406 	return true;
2407 }
2408 
2409 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2410 {
2411 	unsigned long flags;
2412 	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2413 	struct ib_mad_send_wc mad_send_wc;
2414 	struct list_head cancel_list;
2415 
2416 	INIT_LIST_HEAD(&cancel_list);
2417 
2418 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2419 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2420 				 &mad_agent_priv->send_list, agent_list) {
2421 		if (mad_send_wr->status == IB_WC_SUCCESS) {
2422 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2423 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2424 		}
2425 	}
2426 
2427 	/* Empty wait list to prevent receives from finding a request */
2428 	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2429 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2430 
2431 	/* Report all cancelled requests */
2432 	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2433 	mad_send_wc.vendor_err = 0;
2434 
2435 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2436 				 &cancel_list, agent_list) {
2437 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2438 		list_del(&mad_send_wr->agent_list);
2439 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2440 						   &mad_send_wc);
2441 		atomic_dec(&mad_agent_priv->refcount);
2442 	}
2443 }
2444 
2445 static struct ib_mad_send_wr_private*
2446 find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2447 	     struct ib_mad_send_buf *send_buf)
2448 {
2449 	struct ib_mad_send_wr_private *mad_send_wr;
2450 
2451 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2452 			    agent_list) {
2453 		if (&mad_send_wr->send_buf == send_buf)
2454 			return mad_send_wr;
2455 	}
2456 
2457 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2458 			    agent_list) {
2459 		if (is_rmpp_data_mad(mad_agent_priv,
2460 				     mad_send_wr->send_buf.mad) &&
2461 		    &mad_send_wr->send_buf == send_buf)
2462 			return mad_send_wr;
2463 	}
2464 	return NULL;
2465 }
2466 
2467 int ib_modify_mad(struct ib_mad_agent *mad_agent,
2468 		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2469 {
2470 	struct ib_mad_agent_private *mad_agent_priv;
2471 	struct ib_mad_send_wr_private *mad_send_wr;
2472 	unsigned long flags;
2473 	int active;
2474 
2475 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2476 				      agent);
2477 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2478 	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2479 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2480 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2481 		return -EINVAL;
2482 	}
2483 
2484 	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2485 	if (!timeout_ms) {
2486 		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2487 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2488 	}
2489 
2490 	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2491 	if (active)
2492 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2493 	else
2494 		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2495 
2496 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2497 	return 0;
2498 }
2499 EXPORT_SYMBOL(ib_modify_mad);
2500 
2501 void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2502 		   struct ib_mad_send_buf *send_buf)
2503 {
2504 	ib_modify_mad(mad_agent, send_buf, 0);
2505 }
2506 EXPORT_SYMBOL(ib_cancel_mad);
2507 
2508 static void local_completions(struct work_struct *work)
2509 {
2510 	struct ib_mad_agent_private *mad_agent_priv;
2511 	struct ib_mad_local_private *local;
2512 	struct ib_mad_agent_private *recv_mad_agent;
2513 	unsigned long flags;
2514 	int free_mad;
2515 	struct ib_wc wc;
2516 	struct ib_mad_send_wc mad_send_wc;
2517 	bool opa;
2518 
2519 	mad_agent_priv =
2520 		container_of(work, struct ib_mad_agent_private, local_work);
2521 
2522 	opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
2523 			       mad_agent_priv->qp_info->port_priv->port_num);
2524 
2525 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2526 	while (!list_empty(&mad_agent_priv->local_list)) {
2527 		local = list_entry(mad_agent_priv->local_list.next,
2528 				   struct ib_mad_local_private,
2529 				   completion_list);
2530 		list_del(&local->completion_list);
2531 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2532 		free_mad = 0;
2533 		if (local->mad_priv) {
2534 			u8 base_version;
2535 			recv_mad_agent = local->recv_mad_agent;
2536 			if (!recv_mad_agent) {
2537 				dev_err(&mad_agent_priv->agent.device->dev,
2538 					"No receive MAD agent for local completion\n");
2539 				free_mad = 1;
2540 				goto local_send_completion;
2541 			}
2542 
2543 			/*
2544 			 * Defined behavior is to complete response
2545 			 * before request
2546 			 */
2547 			build_smp_wc(recv_mad_agent->agent.qp,
2548 				     local->mad_send_wr->send_wr.wr.wr_cqe,
2549 				     be16_to_cpu(IB_LID_PERMISSIVE),
2550 				     local->mad_send_wr->send_wr.pkey_index,
2551 				     recv_mad_agent->agent.port_num, &wc);
2552 
2553 			local->mad_priv->header.recv_wc.wc = &wc;
2554 
2555 			base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version;
2556 			if (opa && base_version == OPA_MGMT_BASE_VERSION) {
2557 				local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len;
2558 				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2559 			} else {
2560 				local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2561 				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2562 			}
2563 
2564 			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2565 			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2566 				 &local->mad_priv->header.recv_wc.rmpp_list);
2567 			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2568 			local->mad_priv->header.recv_wc.recv_buf.mad =
2569 						(struct ib_mad *)local->mad_priv->mad;
2570 			recv_mad_agent->agent.recv_handler(
2571 						&recv_mad_agent->agent,
2572 						&local->mad_send_wr->send_buf,
2573 						&local->mad_priv->header.recv_wc);
2574 			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2575 			atomic_dec(&recv_mad_agent->refcount);
2576 			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2577 		}
2578 
2579 local_send_completion:
2580 		/* Complete send */
2581 		mad_send_wc.status = IB_WC_SUCCESS;
2582 		mad_send_wc.vendor_err = 0;
2583 		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2584 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2585 						   &mad_send_wc);
2586 
2587 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2588 		atomic_dec(&mad_agent_priv->refcount);
2589 		if (free_mad)
2590 			kfree(local->mad_priv);
2591 		kfree(local);
2592 	}
2593 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2594 }
2595 
2596 static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2597 {
2598 	int ret;
2599 
2600 	if (!mad_send_wr->retries_left)
2601 		return -ETIMEDOUT;
2602 
2603 	mad_send_wr->retries_left--;
2604 	mad_send_wr->send_buf.retries++;
2605 
2606 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2607 
2608 	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2609 		ret = ib_retry_rmpp(mad_send_wr);
2610 		switch (ret) {
2611 		case IB_RMPP_RESULT_UNHANDLED:
2612 			ret = ib_send_mad(mad_send_wr);
2613 			break;
2614 		case IB_RMPP_RESULT_CONSUMED:
2615 			ret = 0;
2616 			break;
2617 		default:
2618 			ret = -ECOMM;
2619 			break;
2620 		}
2621 	} else
2622 		ret = ib_send_mad(mad_send_wr);
2623 
2624 	if (!ret) {
2625 		mad_send_wr->refcount++;
2626 		list_add_tail(&mad_send_wr->agent_list,
2627 			      &mad_send_wr->mad_agent_priv->send_list);
2628 	}
2629 	return ret;
2630 }
2631 
2632 static void timeout_sends(struct work_struct *work)
2633 {
2634 	struct ib_mad_agent_private *mad_agent_priv;
2635 	struct ib_mad_send_wr_private *mad_send_wr;
2636 	struct ib_mad_send_wc mad_send_wc;
2637 	unsigned long flags, delay;
2638 
2639 	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2640 				      timed_work.work);
2641 	mad_send_wc.vendor_err = 0;
2642 
2643 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2644 	while (!list_empty(&mad_agent_priv->wait_list)) {
2645 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2646 					 struct ib_mad_send_wr_private,
2647 					 agent_list);
2648 
2649 		if (time_after(mad_send_wr->timeout, jiffies)) {
2650 			delay = mad_send_wr->timeout - jiffies;
2651 			if ((long)delay <= 0)
2652 				delay = 1;
2653 			queue_delayed_work(mad_agent_priv->qp_info->
2654 					   port_priv->wq,
2655 					   &mad_agent_priv->timed_work, delay);
2656 			break;
2657 		}
2658 
2659 		list_del(&mad_send_wr->agent_list);
2660 		if (mad_send_wr->status == IB_WC_SUCCESS &&
2661 		    !retry_send(mad_send_wr))
2662 			continue;
2663 
2664 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2665 
2666 		if (mad_send_wr->status == IB_WC_SUCCESS)
2667 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2668 		else
2669 			mad_send_wc.status = mad_send_wr->status;
2670 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2671 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2672 						   &mad_send_wc);
2673 
2674 		atomic_dec(&mad_agent_priv->refcount);
2675 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2676 	}
2677 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2678 }
2679 
2680 /*
2681  * Allocate receive MADs and post receive WRs for them
2682  */
2683 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2684 				    struct ib_mad_private *mad)
2685 {
2686 	unsigned long flags;
2687 	int post, ret;
2688 	struct ib_mad_private *mad_priv;
2689 	struct ib_sge sg_list;
2690 	struct ib_recv_wr recv_wr;
2691 	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2692 
2693 	/* Initialize common scatter list fields */
2694 	sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
2695 
2696 	/* Initialize common receive WR fields */
2697 	recv_wr.next = NULL;
2698 	recv_wr.sg_list = &sg_list;
2699 	recv_wr.num_sge = 1;
2700 
2701 	do {
2702 		/* Allocate and map receive buffer */
2703 		if (mad) {
2704 			mad_priv = mad;
2705 			mad = NULL;
2706 		} else {
2707 			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
2708 						     GFP_ATOMIC);
2709 			if (!mad_priv) {
2710 				ret = -ENOMEM;
2711 				break;
2712 			}
2713 		}
2714 		sg_list.length = mad_priv_dma_size(mad_priv);
2715 		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2716 						 &mad_priv->grh,
2717 						 mad_priv_dma_size(mad_priv),
2718 						 DMA_FROM_DEVICE);
2719 		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
2720 						  sg_list.addr))) {
2721 			ret = -ENOMEM;
2722 			break;
2723 		}
2724 		mad_priv->header.mapping = sg_list.addr;
2725 		mad_priv->header.mad_list.mad_queue = recv_queue;
2726 		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
2727 		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
2728 
2729 		/* Post receive WR */
2730 		spin_lock_irqsave(&recv_queue->lock, flags);
2731 		post = (++recv_queue->count < recv_queue->max_active);
2732 		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2733 		spin_unlock_irqrestore(&recv_queue->lock, flags);
2734 		ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
2735 		if (ret) {
2736 			spin_lock_irqsave(&recv_queue->lock, flags);
2737 			list_del(&mad_priv->header.mad_list.list);
2738 			recv_queue->count--;
2739 			spin_unlock_irqrestore(&recv_queue->lock, flags);
2740 			ib_dma_unmap_single(qp_info->port_priv->device,
2741 					    mad_priv->header.mapping,
2742 					    mad_priv_dma_size(mad_priv),
2743 					    DMA_FROM_DEVICE);
2744 			kfree(mad_priv);
2745 			dev_err(&qp_info->port_priv->device->dev,
2746 				"ib_post_recv failed: %d\n", ret);
2747 			break;
2748 		}
2749 	} while (post);
2750 
2751 	return ret;
2752 }
2753 
2754 /*
2755  * Return all the posted receive MADs
2756  */
2757 static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2758 {
2759 	struct ib_mad_private_header *mad_priv_hdr;
2760 	struct ib_mad_private *recv;
2761 	struct ib_mad_list_head *mad_list;
2762 
2763 	if (!qp_info->qp)
2764 		return;
2765 
2766 	while (!list_empty(&qp_info->recv_queue.list)) {
2767 
2768 		mad_list = list_entry(qp_info->recv_queue.list.next,
2769 				      struct ib_mad_list_head, list);
2770 		mad_priv_hdr = container_of(mad_list,
2771 					    struct ib_mad_private_header,
2772 					    mad_list);
2773 		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2774 				    header);
2775 
2776 		/* Remove from posted receive MAD list */
2777 		list_del(&mad_list->list);
2778 
2779 		ib_dma_unmap_single(qp_info->port_priv->device,
2780 				    recv->header.mapping,
2781 				    mad_priv_dma_size(recv),
2782 				    DMA_FROM_DEVICE);
2783 		kfree(recv);
2784 	}
2785 
2786 	qp_info->recv_queue.count = 0;
2787 }
2788 
2789 /*
2790  * Start the port
2791  */
2792 static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2793 {
2794 	int ret, i;
2795 	struct ib_qp_attr *attr;
2796 	struct ib_qp *qp;
2797 	u16 pkey_index;
2798 
2799 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2800 	if (!attr)
2801 		return -ENOMEM;
2802 
2803 	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
2804 			   IB_DEFAULT_PKEY_FULL, &pkey_index);
2805 	if (ret)
2806 		pkey_index = 0;
2807 
2808 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2809 		qp = port_priv->qp_info[i].qp;
2810 		if (!qp)
2811 			continue;
2812 
2813 		/*
2814 		 * PKey index for QP1 is irrelevant but
2815 		 * one is needed for the Reset to Init transition
2816 		 */
2817 		attr->qp_state = IB_QPS_INIT;
2818 		attr->pkey_index = pkey_index;
2819 		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2820 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2821 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2822 		if (ret) {
2823 			dev_err(&port_priv->device->dev,
2824 				"Couldn't change QP%d state to INIT: %d\n",
2825 				i, ret);
2826 			goto out;
2827 		}
2828 
2829 		attr->qp_state = IB_QPS_RTR;
2830 		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2831 		if (ret) {
2832 			dev_err(&port_priv->device->dev,
2833 				"Couldn't change QP%d state to RTR: %d\n",
2834 				i, ret);
2835 			goto out;
2836 		}
2837 
2838 		attr->qp_state = IB_QPS_RTS;
2839 		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2840 		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2841 		if (ret) {
2842 			dev_err(&port_priv->device->dev,
2843 				"Couldn't change QP%d state to RTS: %d\n",
2844 				i, ret);
2845 			goto out;
2846 		}
2847 	}
2848 
2849 	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2850 	if (ret) {
2851 		dev_err(&port_priv->device->dev,
2852 			"Failed to request completion notification: %d\n",
2853 			ret);
2854 		goto out;
2855 	}
2856 
2857 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2858 		if (!port_priv->qp_info[i].qp)
2859 			continue;
2860 
2861 		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2862 		if (ret) {
2863 			dev_err(&port_priv->device->dev,
2864 				"Couldn't post receive WRs\n");
2865 			goto out;
2866 		}
2867 	}
2868 out:
2869 	kfree(attr);
2870 	return ret;
2871 }
2872 
2873 static void qp_event_handler(struct ib_event *event, void *qp_context)
2874 {
2875 	struct ib_mad_qp_info	*qp_info = qp_context;
2876 
2877 	/* It's worse than that! He's dead, Jim! */
2878 	dev_err(&qp_info->port_priv->device->dev,
2879 		"Fatal error (%d) on MAD QP (%d)\n",
2880 		event->event, qp_info->qp->qp_num);
2881 }
2882 
2883 static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2884 			   struct ib_mad_queue *mad_queue)
2885 {
2886 	mad_queue->qp_info = qp_info;
2887 	mad_queue->count = 0;
2888 	spin_lock_init(&mad_queue->lock);
2889 	INIT_LIST_HEAD(&mad_queue->list);
2890 }
2891 
2892 static void init_mad_qp(struct ib_mad_port_private *port_priv,
2893 			struct ib_mad_qp_info *qp_info)
2894 {
2895 	qp_info->port_priv = port_priv;
2896 	init_mad_queue(qp_info, &qp_info->send_queue);
2897 	init_mad_queue(qp_info, &qp_info->recv_queue);
2898 	INIT_LIST_HEAD(&qp_info->overflow_list);
2899 }
2900 
2901 static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2902 			 enum ib_qp_type qp_type)
2903 {
2904 	struct ib_qp_init_attr	qp_init_attr;
2905 	int ret;
2906 
2907 	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2908 	qp_init_attr.send_cq = qp_info->port_priv->cq;
2909 	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2910 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2911 	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2912 	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2913 	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2914 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2915 	qp_init_attr.qp_type = qp_type;
2916 	qp_init_attr.port_num = qp_info->port_priv->port_num;
2917 	qp_init_attr.qp_context = qp_info;
2918 	qp_init_attr.event_handler = qp_event_handler;
2919 	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2920 	if (IS_ERR(qp_info->qp)) {
2921 		dev_err(&qp_info->port_priv->device->dev,
2922 			"Couldn't create ib_mad QP%d\n",
2923 			get_spl_qp_index(qp_type));
2924 		ret = PTR_ERR(qp_info->qp);
2925 		goto error;
2926 	}
2927 	/* Use minimum queue sizes unless the CQ is resized */
2928 	qp_info->send_queue.max_active = mad_sendq_size;
2929 	qp_info->recv_queue.max_active = mad_recvq_size;
2930 	return 0;
2931 
2932 error:
2933 	return ret;
2934 }
2935 
2936 static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2937 {
2938 	if (!qp_info->qp)
2939 		return;
2940 
2941 	ib_destroy_qp(qp_info->qp);
2942 }
2943 
2944 /*
2945  * Open the port
2946  * Create the QP, PD, MR, and CQ if needed
2947  */
2948 static int ib_mad_port_open(struct ib_device *device,
2949 			    int port_num)
2950 {
2951 	int ret, cq_size;
2952 	struct ib_mad_port_private *port_priv;
2953 	unsigned long flags;
2954 	char name[sizeof "ib_mad123"];
2955 	int has_smi;
2956 
2957 	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
2958 		return -EFAULT;
2959 
2960 	if (WARN_ON(rdma_cap_opa_mad(device, port_num) &&
2961 		    rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE))
2962 		return -EFAULT;
2963 
2964 	/* Create new device info */
2965 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2966 	if (!port_priv)
2967 		return -ENOMEM;
2968 
2969 	port_priv->device = device;
2970 	port_priv->port_num = port_num;
2971 	spin_lock_init(&port_priv->reg_lock);
2972 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2973 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2974 
2975 	cq_size = mad_sendq_size + mad_recvq_size;
2976 	has_smi = rdma_cap_ib_smi(device, port_num);
2977 	if (has_smi)
2978 		cq_size *= 2;
2979 
2980 	port_priv->pd = ib_alloc_pd(device, 0);
2981 	if (IS_ERR(port_priv->pd)) {
2982 		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
2983 		ret = PTR_ERR(port_priv->pd);
2984 		goto error3;
2985 	}
2986 
2987 	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
2988 			IB_POLL_UNBOUND_WORKQUEUE);
2989 	if (IS_ERR(port_priv->cq)) {
2990 		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
2991 		ret = PTR_ERR(port_priv->cq);
2992 		goto error4;
2993 	}
2994 
2995 	if (has_smi) {
2996 		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2997 		if (ret)
2998 			goto error6;
2999 	}
3000 	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
3001 	if (ret)
3002 		goto error7;
3003 
3004 	snprintf(name, sizeof name, "ib_mad%d", port_num);
3005 	port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
3006 	if (!port_priv->wq) {
3007 		ret = -ENOMEM;
3008 		goto error8;
3009 	}
3010 
3011 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3012 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
3013 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3014 
3015 	ret = ib_mad_port_start(port_priv);
3016 	if (ret) {
3017 		dev_err(&device->dev, "Couldn't start port\n");
3018 		goto error9;
3019 	}
3020 
3021 	return 0;
3022 
3023 error9:
3024 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3025 	list_del_init(&port_priv->port_list);
3026 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3027 
3028 	destroy_workqueue(port_priv->wq);
3029 error8:
3030 	destroy_mad_qp(&port_priv->qp_info[1]);
3031 error7:
3032 	destroy_mad_qp(&port_priv->qp_info[0]);
3033 error6:
3034 	ib_free_cq(port_priv->cq);
3035 	cleanup_recv_queue(&port_priv->qp_info[1]);
3036 	cleanup_recv_queue(&port_priv->qp_info[0]);
3037 error4:
3038 	ib_dealloc_pd(port_priv->pd);
3039 error3:
3040 	kfree(port_priv);
3041 
3042 	return ret;
3043 }
3044 
3045 /*
3046  * Close the port
3047  * If there are no classes using the port, free the port
3048  * resources (CQ, MR, PD, QP) and remove the port's info structure
3049  */
3050 static int ib_mad_port_close(struct ib_device *device, int port_num)
3051 {
3052 	struct ib_mad_port_private *port_priv;
3053 	unsigned long flags;
3054 
3055 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3056 	port_priv = __ib_get_mad_port(device, port_num);
3057 	if (port_priv == NULL) {
3058 		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3059 		dev_err(&device->dev, "Port %d not found\n", port_num);
3060 		return -ENODEV;
3061 	}
3062 	list_del_init(&port_priv->port_list);
3063 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3064 
3065 	destroy_workqueue(port_priv->wq);
3066 	destroy_mad_qp(&port_priv->qp_info[1]);
3067 	destroy_mad_qp(&port_priv->qp_info[0]);
3068 	ib_free_cq(port_priv->cq);
3069 	ib_dealloc_pd(port_priv->pd);
3070 	cleanup_recv_queue(&port_priv->qp_info[1]);
3071 	cleanup_recv_queue(&port_priv->qp_info[0]);
3072 	/* XXX: Handle deallocation of MAD registration tables */
3073 
3074 	kfree(port_priv);
3075 
3076 	return 0;
3077 }
3078 
3079 static int ib_mad_init_device(struct ib_device *device)
3080 {
3081 	int start, i;
3082 	unsigned int count = 0;
3083 	int ret;
3084 
3085 	start = rdma_start_port(device);
3086 
3087 	for (i = start; i <= rdma_end_port(device); i++) {
3088 		if (!rdma_cap_ib_mad(device, i))
3089 			continue;
3090 
3091 		ret = ib_mad_port_open(device, i);
3092 		if (ret) {
3093 			dev_err(&device->dev, "Couldn't open port %d\n", i);
3094 			goto error;
3095 		}
3096 		ret = ib_agent_port_open(device, i);
3097 		if (ret) {
3098 			dev_err(&device->dev,
3099 				"Couldn't open port %d for agents\n", i);
3100 			goto error_agent;
3101 		}
3102 		count++;
3103 	}
3104 	if (!count)
3105 		return -EOPNOTSUPP;
3106 
3107 	return 0;
3108 
3109 error_agent:
3110 	if (ib_mad_port_close(device, i))
3111 		dev_err(&device->dev, "Couldn't close port %d\n", i);
3112 
3113 error:
3114 	while (--i >= start) {
3115 		if (!rdma_cap_ib_mad(device, i))
3116 			continue;
3117 
3118 		if (ib_agent_port_close(device, i))
3119 			dev_err(&device->dev,
3120 				"Couldn't close port %d for agents\n", i);
3121 		if (ib_mad_port_close(device, i))
3122 			dev_err(&device->dev, "Couldn't close port %d\n", i);
3123 	}
3124 	return ret;
3125 }
3126 
3127 static void ib_mad_remove_device(struct ib_device *device, void *client_data)
3128 {
3129 	unsigned int i;
3130 
3131 	rdma_for_each_port (device, i) {
3132 		if (!rdma_cap_ib_mad(device, i))
3133 			continue;
3134 
3135 		if (ib_agent_port_close(device, i))
3136 			dev_err(&device->dev,
3137 				"Couldn't close port %d for agents\n", i);
3138 		if (ib_mad_port_close(device, i))
3139 			dev_err(&device->dev, "Couldn't close port %d\n", i);
3140 	}
3141 }
3142 
3143 static struct ib_client mad_client = {
3144 	.name   = "mad",
3145 	.add = ib_mad_init_device,
3146 	.remove = ib_mad_remove_device
3147 };
3148 
3149 int ib_mad_init(void)
3150 {
3151 	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3152 	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3153 
3154 	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3155 	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3156 
3157 	INIT_LIST_HEAD(&ib_mad_port_list);
3158 
3159 	if (ib_register_client(&mad_client)) {
3160 		pr_err("Couldn't register ib_mad client\n");
3161 		return -EINVAL;
3162 	}
3163 
3164 	return 0;
3165 }
3166 
3167 void ib_mad_cleanup(void)
3168 {
3169 	ib_unregister_client(&mad_client);
3170 }
3171