xref: /openbmc/linux/drivers/infiniband/core/mad.c (revision b85d4594)
1 /*
2  * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5  * Copyright (c) 2009 HNR Consulting. All rights reserved.
6  * Copyright (c) 2014 Intel Corporation.  All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37 
38 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39 
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/module.h>
43 #include <rdma/ib_cache.h>
44 
45 #include "mad_priv.h"
46 #include "mad_rmpp.h"
47 #include "smi.h"
48 #include "opa_smi.h"
49 #include "agent.h"
50 
51 MODULE_LICENSE("Dual BSD/GPL");
52 MODULE_DESCRIPTION("kernel IB MAD API");
53 MODULE_AUTHOR("Hal Rosenstock");
54 MODULE_AUTHOR("Sean Hefty");
55 
56 static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
57 static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
58 
59 module_param_named(send_queue_size, mad_sendq_size, int, 0444);
60 MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
61 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
62 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
63 
64 static struct list_head ib_mad_port_list;
65 static u32 ib_mad_client_id = 0;
66 
67 /* Port list lock */
68 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
69 
70 /* Forward declarations */
71 static int method_in_use(struct ib_mad_mgmt_method_table **method,
72 			 struct ib_mad_reg_req *mad_reg_req);
73 static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
74 static struct ib_mad_agent_private *find_mad_agent(
75 					struct ib_mad_port_private *port_priv,
76 					const struct ib_mad_hdr *mad);
77 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
78 				    struct ib_mad_private *mad);
79 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
80 static void timeout_sends(struct work_struct *work);
81 static void local_completions(struct work_struct *work);
82 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
83 			      struct ib_mad_agent_private *agent_priv,
84 			      u8 mgmt_class);
85 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
86 			   struct ib_mad_agent_private *agent_priv);
87 
88 /*
89  * Returns a ib_mad_port_private structure or NULL for a device/port
90  * Assumes ib_mad_port_list_lock is being held
91  */
92 static inline struct ib_mad_port_private *
93 __ib_get_mad_port(struct ib_device *device, int port_num)
94 {
95 	struct ib_mad_port_private *entry;
96 
97 	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
98 		if (entry->device == device && entry->port_num == port_num)
99 			return entry;
100 	}
101 	return NULL;
102 }
103 
104 /*
105  * Wrapper function to return a ib_mad_port_private structure or NULL
106  * for a device/port
107  */
108 static inline struct ib_mad_port_private *
109 ib_get_mad_port(struct ib_device *device, int port_num)
110 {
111 	struct ib_mad_port_private *entry;
112 	unsigned long flags;
113 
114 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
115 	entry = __ib_get_mad_port(device, port_num);
116 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
117 
118 	return entry;
119 }
120 
121 static inline u8 convert_mgmt_class(u8 mgmt_class)
122 {
123 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
124 	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
125 		0 : mgmt_class;
126 }
127 
128 static int get_spl_qp_index(enum ib_qp_type qp_type)
129 {
130 	switch (qp_type)
131 	{
132 	case IB_QPT_SMI:
133 		return 0;
134 	case IB_QPT_GSI:
135 		return 1;
136 	default:
137 		return -1;
138 	}
139 }
140 
141 static int vendor_class_index(u8 mgmt_class)
142 {
143 	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
144 }
145 
146 static int is_vendor_class(u8 mgmt_class)
147 {
148 	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
149 	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
150 		return 0;
151 	return 1;
152 }
153 
154 static int is_vendor_oui(char *oui)
155 {
156 	if (oui[0] || oui[1] || oui[2])
157 		return 1;
158 	return 0;
159 }
160 
161 static int is_vendor_method_in_use(
162 		struct ib_mad_mgmt_vendor_class *vendor_class,
163 		struct ib_mad_reg_req *mad_reg_req)
164 {
165 	struct ib_mad_mgmt_method_table *method;
166 	int i;
167 
168 	for (i = 0; i < MAX_MGMT_OUI; i++) {
169 		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
170 			method = vendor_class->method_table[i];
171 			if (method) {
172 				if (method_in_use(&method, mad_reg_req))
173 					return 1;
174 				else
175 					break;
176 			}
177 		}
178 	}
179 	return 0;
180 }
181 
182 int ib_response_mad(const struct ib_mad_hdr *hdr)
183 {
184 	return ((hdr->method & IB_MGMT_METHOD_RESP) ||
185 		(hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) ||
186 		((hdr->mgmt_class == IB_MGMT_CLASS_BM) &&
187 		 (hdr->attr_mod & IB_BM_ATTR_MOD_RESP)));
188 }
189 EXPORT_SYMBOL(ib_response_mad);
190 
191 /*
192  * ib_register_mad_agent - Register to send/receive MADs
193  */
194 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
195 					   u8 port_num,
196 					   enum ib_qp_type qp_type,
197 					   struct ib_mad_reg_req *mad_reg_req,
198 					   u8 rmpp_version,
199 					   ib_mad_send_handler send_handler,
200 					   ib_mad_recv_handler recv_handler,
201 					   void *context,
202 					   u32 registration_flags)
203 {
204 	struct ib_mad_port_private *port_priv;
205 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
206 	struct ib_mad_agent_private *mad_agent_priv;
207 	struct ib_mad_reg_req *reg_req = NULL;
208 	struct ib_mad_mgmt_class_table *class;
209 	struct ib_mad_mgmt_vendor_class_table *vendor;
210 	struct ib_mad_mgmt_vendor_class *vendor_class;
211 	struct ib_mad_mgmt_method_table *method;
212 	int ret2, qpn;
213 	unsigned long flags;
214 	u8 mgmt_class, vclass;
215 
216 	/* Validate parameters */
217 	qpn = get_spl_qp_index(qp_type);
218 	if (qpn == -1) {
219 		dev_notice(&device->dev,
220 			   "ib_register_mad_agent: invalid QP Type %d\n",
221 			   qp_type);
222 		goto error1;
223 	}
224 
225 	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
226 		dev_notice(&device->dev,
227 			   "ib_register_mad_agent: invalid RMPP Version %u\n",
228 			   rmpp_version);
229 		goto error1;
230 	}
231 
232 	/* Validate MAD registration request if supplied */
233 	if (mad_reg_req) {
234 		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
235 			dev_notice(&device->dev,
236 				   "ib_register_mad_agent: invalid Class Version %u\n",
237 				   mad_reg_req->mgmt_class_version);
238 			goto error1;
239 		}
240 		if (!recv_handler) {
241 			dev_notice(&device->dev,
242 				   "ib_register_mad_agent: no recv_handler\n");
243 			goto error1;
244 		}
245 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
246 			/*
247 			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
248 			 * one in this range currently allowed
249 			 */
250 			if (mad_reg_req->mgmt_class !=
251 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
252 				dev_notice(&device->dev,
253 					   "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
254 					   mad_reg_req->mgmt_class);
255 				goto error1;
256 			}
257 		} else if (mad_reg_req->mgmt_class == 0) {
258 			/*
259 			 * Class 0 is reserved in IBA and is used for
260 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
261 			 */
262 			dev_notice(&device->dev,
263 				   "ib_register_mad_agent: Invalid Mgmt Class 0\n");
264 			goto error1;
265 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
266 			/*
267 			 * If class is in "new" vendor range,
268 			 * ensure supplied OUI is not zero
269 			 */
270 			if (!is_vendor_oui(mad_reg_req->oui)) {
271 				dev_notice(&device->dev,
272 					   "ib_register_mad_agent: No OUI specified for class 0x%x\n",
273 					   mad_reg_req->mgmt_class);
274 				goto error1;
275 			}
276 		}
277 		/* Make sure class supplied is consistent with RMPP */
278 		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
279 			if (rmpp_version) {
280 				dev_notice(&device->dev,
281 					   "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
282 					   mad_reg_req->mgmt_class);
283 				goto error1;
284 			}
285 		}
286 
287 		/* Make sure class supplied is consistent with QP type */
288 		if (qp_type == IB_QPT_SMI) {
289 			if ((mad_reg_req->mgmt_class !=
290 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
291 			    (mad_reg_req->mgmt_class !=
292 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
293 				dev_notice(&device->dev,
294 					   "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
295 					   mad_reg_req->mgmt_class);
296 				goto error1;
297 			}
298 		} else {
299 			if ((mad_reg_req->mgmt_class ==
300 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
301 			    (mad_reg_req->mgmt_class ==
302 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
303 				dev_notice(&device->dev,
304 					   "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
305 					   mad_reg_req->mgmt_class);
306 				goto error1;
307 			}
308 		}
309 	} else {
310 		/* No registration request supplied */
311 		if (!send_handler)
312 			goto error1;
313 		if (registration_flags & IB_MAD_USER_RMPP)
314 			goto error1;
315 	}
316 
317 	/* Validate device and port */
318 	port_priv = ib_get_mad_port(device, port_num);
319 	if (!port_priv) {
320 		dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
321 		ret = ERR_PTR(-ENODEV);
322 		goto error1;
323 	}
324 
325 	/* Verify the QP requested is supported.  For example, Ethernet devices
326 	 * will not have QP0 */
327 	if (!port_priv->qp_info[qpn].qp) {
328 		dev_notice(&device->dev,
329 			   "ib_register_mad_agent: QP %d not supported\n", qpn);
330 		ret = ERR_PTR(-EPROTONOSUPPORT);
331 		goto error1;
332 	}
333 
334 	/* Allocate structures */
335 	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
336 	if (!mad_agent_priv) {
337 		ret = ERR_PTR(-ENOMEM);
338 		goto error1;
339 	}
340 
341 	if (mad_reg_req) {
342 		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
343 		if (!reg_req) {
344 			ret = ERR_PTR(-ENOMEM);
345 			goto error3;
346 		}
347 	}
348 
349 	/* Now, fill in the various structures */
350 	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
351 	mad_agent_priv->reg_req = reg_req;
352 	mad_agent_priv->agent.rmpp_version = rmpp_version;
353 	mad_agent_priv->agent.device = device;
354 	mad_agent_priv->agent.recv_handler = recv_handler;
355 	mad_agent_priv->agent.send_handler = send_handler;
356 	mad_agent_priv->agent.context = context;
357 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
358 	mad_agent_priv->agent.port_num = port_num;
359 	mad_agent_priv->agent.flags = registration_flags;
360 	spin_lock_init(&mad_agent_priv->lock);
361 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
362 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
363 	INIT_LIST_HEAD(&mad_agent_priv->done_list);
364 	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
365 	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
366 	INIT_LIST_HEAD(&mad_agent_priv->local_list);
367 	INIT_WORK(&mad_agent_priv->local_work, local_completions);
368 	atomic_set(&mad_agent_priv->refcount, 1);
369 	init_completion(&mad_agent_priv->comp);
370 
371 	spin_lock_irqsave(&port_priv->reg_lock, flags);
372 	mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
373 
374 	/*
375 	 * Make sure MAD registration (if supplied)
376 	 * is non overlapping with any existing ones
377 	 */
378 	if (mad_reg_req) {
379 		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
380 		if (!is_vendor_class(mgmt_class)) {
381 			class = port_priv->version[mad_reg_req->
382 						   mgmt_class_version].class;
383 			if (class) {
384 				method = class->method_table[mgmt_class];
385 				if (method) {
386 					if (method_in_use(&method,
387 							   mad_reg_req))
388 						goto error4;
389 				}
390 			}
391 			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
392 						  mgmt_class);
393 		} else {
394 			/* "New" vendor class range */
395 			vendor = port_priv->version[mad_reg_req->
396 						    mgmt_class_version].vendor;
397 			if (vendor) {
398 				vclass = vendor_class_index(mgmt_class);
399 				vendor_class = vendor->vendor_class[vclass];
400 				if (vendor_class) {
401 					if (is_vendor_method_in_use(
402 							vendor_class,
403 							mad_reg_req))
404 						goto error4;
405 				}
406 			}
407 			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
408 		}
409 		if (ret2) {
410 			ret = ERR_PTR(ret2);
411 			goto error4;
412 		}
413 	}
414 
415 	/* Add mad agent into port's agent list */
416 	list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
417 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
418 
419 	return &mad_agent_priv->agent;
420 
421 error4:
422 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
423 	kfree(reg_req);
424 error3:
425 	kfree(mad_agent_priv);
426 error1:
427 	return ret;
428 }
429 EXPORT_SYMBOL(ib_register_mad_agent);
430 
431 static inline int is_snooping_sends(int mad_snoop_flags)
432 {
433 	return (mad_snoop_flags &
434 		(/*IB_MAD_SNOOP_POSTED_SENDS |
435 		 IB_MAD_SNOOP_RMPP_SENDS |*/
436 		 IB_MAD_SNOOP_SEND_COMPLETIONS /*|
437 		 IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
438 }
439 
440 static inline int is_snooping_recvs(int mad_snoop_flags)
441 {
442 	return (mad_snoop_flags &
443 		(IB_MAD_SNOOP_RECVS /*|
444 		 IB_MAD_SNOOP_RMPP_RECVS*/));
445 }
446 
447 static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
448 				struct ib_mad_snoop_private *mad_snoop_priv)
449 {
450 	struct ib_mad_snoop_private **new_snoop_table;
451 	unsigned long flags;
452 	int i;
453 
454 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
455 	/* Check for empty slot in array. */
456 	for (i = 0; i < qp_info->snoop_table_size; i++)
457 		if (!qp_info->snoop_table[i])
458 			break;
459 
460 	if (i == qp_info->snoop_table_size) {
461 		/* Grow table. */
462 		new_snoop_table = krealloc(qp_info->snoop_table,
463 					   sizeof mad_snoop_priv *
464 					   (qp_info->snoop_table_size + 1),
465 					   GFP_ATOMIC);
466 		if (!new_snoop_table) {
467 			i = -ENOMEM;
468 			goto out;
469 		}
470 
471 		qp_info->snoop_table = new_snoop_table;
472 		qp_info->snoop_table_size++;
473 	}
474 	qp_info->snoop_table[i] = mad_snoop_priv;
475 	atomic_inc(&qp_info->snoop_count);
476 out:
477 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
478 	return i;
479 }
480 
481 struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
482 					   u8 port_num,
483 					   enum ib_qp_type qp_type,
484 					   int mad_snoop_flags,
485 					   ib_mad_snoop_handler snoop_handler,
486 					   ib_mad_recv_handler recv_handler,
487 					   void *context)
488 {
489 	struct ib_mad_port_private *port_priv;
490 	struct ib_mad_agent *ret;
491 	struct ib_mad_snoop_private *mad_snoop_priv;
492 	int qpn;
493 
494 	/* Validate parameters */
495 	if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
496 	    (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
497 		ret = ERR_PTR(-EINVAL);
498 		goto error1;
499 	}
500 	qpn = get_spl_qp_index(qp_type);
501 	if (qpn == -1) {
502 		ret = ERR_PTR(-EINVAL);
503 		goto error1;
504 	}
505 	port_priv = ib_get_mad_port(device, port_num);
506 	if (!port_priv) {
507 		ret = ERR_PTR(-ENODEV);
508 		goto error1;
509 	}
510 	/* Allocate structures */
511 	mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
512 	if (!mad_snoop_priv) {
513 		ret = ERR_PTR(-ENOMEM);
514 		goto error1;
515 	}
516 
517 	/* Now, fill in the various structures */
518 	mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
519 	mad_snoop_priv->agent.device = device;
520 	mad_snoop_priv->agent.recv_handler = recv_handler;
521 	mad_snoop_priv->agent.snoop_handler = snoop_handler;
522 	mad_snoop_priv->agent.context = context;
523 	mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
524 	mad_snoop_priv->agent.port_num = port_num;
525 	mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
526 	init_completion(&mad_snoop_priv->comp);
527 	mad_snoop_priv->snoop_index = register_snoop_agent(
528 						&port_priv->qp_info[qpn],
529 						mad_snoop_priv);
530 	if (mad_snoop_priv->snoop_index < 0) {
531 		ret = ERR_PTR(mad_snoop_priv->snoop_index);
532 		goto error2;
533 	}
534 
535 	atomic_set(&mad_snoop_priv->refcount, 1);
536 	return &mad_snoop_priv->agent;
537 
538 error2:
539 	kfree(mad_snoop_priv);
540 error1:
541 	return ret;
542 }
543 EXPORT_SYMBOL(ib_register_mad_snoop);
544 
545 static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
546 {
547 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
548 		complete(&mad_agent_priv->comp);
549 }
550 
551 static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
552 {
553 	if (atomic_dec_and_test(&mad_snoop_priv->refcount))
554 		complete(&mad_snoop_priv->comp);
555 }
556 
557 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
558 {
559 	struct ib_mad_port_private *port_priv;
560 	unsigned long flags;
561 
562 	/* Note that we could still be handling received MADs */
563 
564 	/*
565 	 * Canceling all sends results in dropping received response
566 	 * MADs, preventing us from queuing additional work
567 	 */
568 	cancel_mads(mad_agent_priv);
569 	port_priv = mad_agent_priv->qp_info->port_priv;
570 	cancel_delayed_work(&mad_agent_priv->timed_work);
571 
572 	spin_lock_irqsave(&port_priv->reg_lock, flags);
573 	remove_mad_reg_req(mad_agent_priv);
574 	list_del(&mad_agent_priv->agent_list);
575 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
576 
577 	flush_workqueue(port_priv->wq);
578 	ib_cancel_rmpp_recvs(mad_agent_priv);
579 
580 	deref_mad_agent(mad_agent_priv);
581 	wait_for_completion(&mad_agent_priv->comp);
582 
583 	kfree(mad_agent_priv->reg_req);
584 	kfree(mad_agent_priv);
585 }
586 
587 static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
588 {
589 	struct ib_mad_qp_info *qp_info;
590 	unsigned long flags;
591 
592 	qp_info = mad_snoop_priv->qp_info;
593 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
594 	qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
595 	atomic_dec(&qp_info->snoop_count);
596 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
597 
598 	deref_snoop_agent(mad_snoop_priv);
599 	wait_for_completion(&mad_snoop_priv->comp);
600 
601 	kfree(mad_snoop_priv);
602 }
603 
604 /*
605  * ib_unregister_mad_agent - Unregisters a client from using MAD services
606  */
607 int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
608 {
609 	struct ib_mad_agent_private *mad_agent_priv;
610 	struct ib_mad_snoop_private *mad_snoop_priv;
611 
612 	/* If the TID is zero, the agent can only snoop. */
613 	if (mad_agent->hi_tid) {
614 		mad_agent_priv = container_of(mad_agent,
615 					      struct ib_mad_agent_private,
616 					      agent);
617 		unregister_mad_agent(mad_agent_priv);
618 	} else {
619 		mad_snoop_priv = container_of(mad_agent,
620 					      struct ib_mad_snoop_private,
621 					      agent);
622 		unregister_mad_snoop(mad_snoop_priv);
623 	}
624 	return 0;
625 }
626 EXPORT_SYMBOL(ib_unregister_mad_agent);
627 
628 static void dequeue_mad(struct ib_mad_list_head *mad_list)
629 {
630 	struct ib_mad_queue *mad_queue;
631 	unsigned long flags;
632 
633 	BUG_ON(!mad_list->mad_queue);
634 	mad_queue = mad_list->mad_queue;
635 	spin_lock_irqsave(&mad_queue->lock, flags);
636 	list_del(&mad_list->list);
637 	mad_queue->count--;
638 	spin_unlock_irqrestore(&mad_queue->lock, flags);
639 }
640 
641 static void snoop_send(struct ib_mad_qp_info *qp_info,
642 		       struct ib_mad_send_buf *send_buf,
643 		       struct ib_mad_send_wc *mad_send_wc,
644 		       int mad_snoop_flags)
645 {
646 	struct ib_mad_snoop_private *mad_snoop_priv;
647 	unsigned long flags;
648 	int i;
649 
650 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
651 	for (i = 0; i < qp_info->snoop_table_size; i++) {
652 		mad_snoop_priv = qp_info->snoop_table[i];
653 		if (!mad_snoop_priv ||
654 		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
655 			continue;
656 
657 		atomic_inc(&mad_snoop_priv->refcount);
658 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
659 		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
660 						    send_buf, mad_send_wc);
661 		deref_snoop_agent(mad_snoop_priv);
662 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
663 	}
664 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
665 }
666 
667 static void snoop_recv(struct ib_mad_qp_info *qp_info,
668 		       struct ib_mad_recv_wc *mad_recv_wc,
669 		       int mad_snoop_flags)
670 {
671 	struct ib_mad_snoop_private *mad_snoop_priv;
672 	unsigned long flags;
673 	int i;
674 
675 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
676 	for (i = 0; i < qp_info->snoop_table_size; i++) {
677 		mad_snoop_priv = qp_info->snoop_table[i];
678 		if (!mad_snoop_priv ||
679 		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
680 			continue;
681 
682 		atomic_inc(&mad_snoop_priv->refcount);
683 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
684 		mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
685 						   mad_recv_wc);
686 		deref_snoop_agent(mad_snoop_priv);
687 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
688 	}
689 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
690 }
691 
692 static void build_smp_wc(struct ib_qp *qp,
693 			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
694 			 struct ib_wc *wc)
695 {
696 	memset(wc, 0, sizeof *wc);
697 	wc->wr_id = wr_id;
698 	wc->status = IB_WC_SUCCESS;
699 	wc->opcode = IB_WC_RECV;
700 	wc->pkey_index = pkey_index;
701 	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
702 	wc->src_qp = IB_QP0;
703 	wc->qp = qp;
704 	wc->slid = slid;
705 	wc->sl = 0;
706 	wc->dlid_path_bits = 0;
707 	wc->port_num = port_num;
708 }
709 
710 static size_t mad_priv_size(const struct ib_mad_private *mp)
711 {
712 	return sizeof(struct ib_mad_private) + mp->mad_size;
713 }
714 
715 static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags)
716 {
717 	size_t size = sizeof(struct ib_mad_private) + mad_size;
718 	struct ib_mad_private *ret = kzalloc(size, flags);
719 
720 	if (ret)
721 		ret->mad_size = mad_size;
722 
723 	return ret;
724 }
725 
726 static size_t port_mad_size(const struct ib_mad_port_private *port_priv)
727 {
728 	return rdma_max_mad_size(port_priv->device, port_priv->port_num);
729 }
730 
731 static size_t mad_priv_dma_size(const struct ib_mad_private *mp)
732 {
733 	return sizeof(struct ib_grh) + mp->mad_size;
734 }
735 
736 /*
737  * Return 0 if SMP is to be sent
738  * Return 1 if SMP was consumed locally (whether or not solicited)
739  * Return < 0 if error
740  */
741 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
742 				  struct ib_mad_send_wr_private *mad_send_wr)
743 {
744 	int ret = 0;
745 	struct ib_smp *smp = mad_send_wr->send_buf.mad;
746 	struct opa_smp *opa_smp = (struct opa_smp *)smp;
747 	unsigned long flags;
748 	struct ib_mad_local_private *local;
749 	struct ib_mad_private *mad_priv;
750 	struct ib_mad_port_private *port_priv;
751 	struct ib_mad_agent_private *recv_mad_agent = NULL;
752 	struct ib_device *device = mad_agent_priv->agent.device;
753 	u8 port_num;
754 	struct ib_wc mad_wc;
755 	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
756 	size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
757 	u16 out_mad_pkey_index = 0;
758 	u16 drslid;
759 	bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
760 				    mad_agent_priv->qp_info->port_priv->port_num);
761 
762 	if (rdma_cap_ib_switch(device) &&
763 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
764 		port_num = send_wr->wr.ud.port_num;
765 	else
766 		port_num = mad_agent_priv->agent.port_num;
767 
768 	/*
769 	 * Directed route handling starts if the initial LID routed part of
770 	 * a request or the ending LID routed part of a response is empty.
771 	 * If we are at the start of the LID routed part, don't update the
772 	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
773 	 */
774 	if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) {
775 		u32 opa_drslid;
776 
777 		if ((opa_get_smp_direction(opa_smp)
778 		     ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
779 		     OPA_LID_PERMISSIVE &&
780 		     opa_smi_handle_dr_smp_send(opa_smp,
781 						rdma_cap_ib_switch(device),
782 						port_num) == IB_SMI_DISCARD) {
783 			ret = -EINVAL;
784 			dev_err(&device->dev, "OPA Invalid directed route\n");
785 			goto out;
786 		}
787 		opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid);
788 		if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) &&
789 		    opa_drslid & 0xffff0000) {
790 			ret = -EINVAL;
791 			dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n",
792 			       opa_drslid);
793 			goto out;
794 		}
795 		drslid = (u16)(opa_drslid & 0x0000ffff);
796 
797 		/* Check to post send on QP or process locally */
798 		if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD &&
799 		    opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
800 			goto out;
801 	} else {
802 		if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
803 		     IB_LID_PERMISSIVE &&
804 		     smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
805 		     IB_SMI_DISCARD) {
806 			ret = -EINVAL;
807 			dev_err(&device->dev, "Invalid directed route\n");
808 			goto out;
809 		}
810 		drslid = be16_to_cpu(smp->dr_slid);
811 
812 		/* Check to post send on QP or process locally */
813 		if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
814 		    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
815 			goto out;
816 	}
817 
818 	local = kmalloc(sizeof *local, GFP_ATOMIC);
819 	if (!local) {
820 		ret = -ENOMEM;
821 		dev_err(&device->dev, "No memory for ib_mad_local_private\n");
822 		goto out;
823 	}
824 	local->mad_priv = NULL;
825 	local->recv_mad_agent = NULL;
826 	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
827 	if (!mad_priv) {
828 		ret = -ENOMEM;
829 		dev_err(&device->dev, "No memory for local response MAD\n");
830 		kfree(local);
831 		goto out;
832 	}
833 
834 	build_smp_wc(mad_agent_priv->agent.qp,
835 		     send_wr->wr_id, drslid,
836 		     send_wr->wr.ud.pkey_index,
837 		     send_wr->wr.ud.port_num, &mad_wc);
838 
839 	if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
840 		mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
841 					+ mad_send_wr->send_buf.data_len
842 					+ sizeof(struct ib_grh);
843 	}
844 
845 	/* No GRH for DR SMP */
846 	ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
847 				  (const struct ib_mad_hdr *)smp, mad_size,
848 				  (struct ib_mad_hdr *)mad_priv->mad,
849 				  &mad_size, &out_mad_pkey_index);
850 	switch (ret)
851 	{
852 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
853 		if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
854 		    mad_agent_priv->agent.recv_handler) {
855 			local->mad_priv = mad_priv;
856 			local->recv_mad_agent = mad_agent_priv;
857 			/*
858 			 * Reference MAD agent until receive
859 			 * side of local completion handled
860 			 */
861 			atomic_inc(&mad_agent_priv->refcount);
862 		} else
863 			kfree(mad_priv);
864 		break;
865 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
866 		kfree(mad_priv);
867 		break;
868 	case IB_MAD_RESULT_SUCCESS:
869 		/* Treat like an incoming receive MAD */
870 		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
871 					    mad_agent_priv->agent.port_num);
872 		if (port_priv) {
873 			memcpy(mad_priv->mad, smp, mad_priv->mad_size);
874 			recv_mad_agent = find_mad_agent(port_priv,
875 						        (const struct ib_mad_hdr *)mad_priv->mad);
876 		}
877 		if (!port_priv || !recv_mad_agent) {
878 			/*
879 			 * No receiving agent so drop packet and
880 			 * generate send completion.
881 			 */
882 			kfree(mad_priv);
883 			break;
884 		}
885 		local->mad_priv = mad_priv;
886 		local->recv_mad_agent = recv_mad_agent;
887 		break;
888 	default:
889 		kfree(mad_priv);
890 		kfree(local);
891 		ret = -EINVAL;
892 		goto out;
893 	}
894 
895 	local->mad_send_wr = mad_send_wr;
896 	if (opa) {
897 		local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index;
898 		local->return_wc_byte_len = mad_size;
899 	}
900 	/* Reference MAD agent until send side of local completion handled */
901 	atomic_inc(&mad_agent_priv->refcount);
902 	/* Queue local completion to local list */
903 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
904 	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
905 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
906 	queue_work(mad_agent_priv->qp_info->port_priv->wq,
907 		   &mad_agent_priv->local_work);
908 	ret = 1;
909 out:
910 	return ret;
911 }
912 
913 static int get_pad_size(int hdr_len, int data_len, size_t mad_size)
914 {
915 	int seg_size, pad;
916 
917 	seg_size = mad_size - hdr_len;
918 	if (data_len && seg_size) {
919 		pad = seg_size - data_len % seg_size;
920 		return pad == seg_size ? 0 : pad;
921 	} else
922 		return seg_size;
923 }
924 
925 static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
926 {
927 	struct ib_rmpp_segment *s, *t;
928 
929 	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
930 		list_del(&s->list);
931 		kfree(s);
932 	}
933 }
934 
935 static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
936 				size_t mad_size, gfp_t gfp_mask)
937 {
938 	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
939 	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
940 	struct ib_rmpp_segment *seg = NULL;
941 	int left, seg_size, pad;
942 
943 	send_buf->seg_size = mad_size - send_buf->hdr_len;
944 	send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR;
945 	seg_size = send_buf->seg_size;
946 	pad = send_wr->pad;
947 
948 	/* Allocate data segments. */
949 	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
950 		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
951 		if (!seg) {
952 			dev_err(&send_buf->mad_agent->device->dev,
953 				"alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
954 				sizeof (*seg) + seg_size, gfp_mask);
955 			free_send_rmpp_list(send_wr);
956 			return -ENOMEM;
957 		}
958 		seg->num = ++send_buf->seg_count;
959 		list_add_tail(&seg->list, &send_wr->rmpp_list);
960 	}
961 
962 	/* Zero any padding */
963 	if (pad)
964 		memset(seg->data + seg_size - pad, 0, pad);
965 
966 	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
967 					  agent.rmpp_version;
968 	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
969 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
970 
971 	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
972 					struct ib_rmpp_segment, list);
973 	send_wr->last_ack_seg = send_wr->cur_seg;
974 	return 0;
975 }
976 
977 int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
978 {
979 	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
980 }
981 EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
982 
983 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
984 					    u32 remote_qpn, u16 pkey_index,
985 					    int rmpp_active,
986 					    int hdr_len, int data_len,
987 					    gfp_t gfp_mask,
988 					    u8 base_version)
989 {
990 	struct ib_mad_agent_private *mad_agent_priv;
991 	struct ib_mad_send_wr_private *mad_send_wr;
992 	int pad, message_size, ret, size;
993 	void *buf;
994 	size_t mad_size;
995 	bool opa;
996 
997 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
998 				      agent);
999 
1000 	opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num);
1001 
1002 	if (opa && base_version == OPA_MGMT_BASE_VERSION)
1003 		mad_size = sizeof(struct opa_mad);
1004 	else
1005 		mad_size = sizeof(struct ib_mad);
1006 
1007 	pad = get_pad_size(hdr_len, data_len, mad_size);
1008 	message_size = hdr_len + data_len + pad;
1009 
1010 	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
1011 		if (!rmpp_active && message_size > mad_size)
1012 			return ERR_PTR(-EINVAL);
1013 	} else
1014 		if (rmpp_active || message_size > mad_size)
1015 			return ERR_PTR(-EINVAL);
1016 
1017 	size = rmpp_active ? hdr_len : mad_size;
1018 	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
1019 	if (!buf)
1020 		return ERR_PTR(-ENOMEM);
1021 
1022 	mad_send_wr = buf + size;
1023 	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
1024 	mad_send_wr->send_buf.mad = buf;
1025 	mad_send_wr->send_buf.hdr_len = hdr_len;
1026 	mad_send_wr->send_buf.data_len = data_len;
1027 	mad_send_wr->pad = pad;
1028 
1029 	mad_send_wr->mad_agent_priv = mad_agent_priv;
1030 	mad_send_wr->sg_list[0].length = hdr_len;
1031 	mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
1032 
1033 	/* OPA MADs don't have to be the full 2048 bytes */
1034 	if (opa && base_version == OPA_MGMT_BASE_VERSION &&
1035 	    data_len < mad_size - hdr_len)
1036 		mad_send_wr->sg_list[1].length = data_len;
1037 	else
1038 		mad_send_wr->sg_list[1].length = mad_size - hdr_len;
1039 
1040 	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
1041 
1042 	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
1043 	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
1044 	mad_send_wr->send_wr.num_sge = 2;
1045 	mad_send_wr->send_wr.opcode = IB_WR_SEND;
1046 	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
1047 	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
1048 	mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
1049 	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
1050 
1051 	if (rmpp_active) {
1052 		ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
1053 		if (ret) {
1054 			kfree(buf);
1055 			return ERR_PTR(ret);
1056 		}
1057 	}
1058 
1059 	mad_send_wr->send_buf.mad_agent = mad_agent;
1060 	atomic_inc(&mad_agent_priv->refcount);
1061 	return &mad_send_wr->send_buf;
1062 }
1063 EXPORT_SYMBOL(ib_create_send_mad);
1064 
1065 int ib_get_mad_data_offset(u8 mgmt_class)
1066 {
1067 	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
1068 		return IB_MGMT_SA_HDR;
1069 	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
1070 		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
1071 		 (mgmt_class == IB_MGMT_CLASS_BIS))
1072 		return IB_MGMT_DEVICE_HDR;
1073 	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
1074 		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
1075 		return IB_MGMT_VENDOR_HDR;
1076 	else
1077 		return IB_MGMT_MAD_HDR;
1078 }
1079 EXPORT_SYMBOL(ib_get_mad_data_offset);
1080 
1081 int ib_is_mad_class_rmpp(u8 mgmt_class)
1082 {
1083 	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
1084 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
1085 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
1086 	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
1087 	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
1088 	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
1089 		return 1;
1090 	return 0;
1091 }
1092 EXPORT_SYMBOL(ib_is_mad_class_rmpp);
1093 
1094 void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
1095 {
1096 	struct ib_mad_send_wr_private *mad_send_wr;
1097 	struct list_head *list;
1098 
1099 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
1100 				   send_buf);
1101 	list = &mad_send_wr->cur_seg->list;
1102 
1103 	if (mad_send_wr->cur_seg->num < seg_num) {
1104 		list_for_each_entry(mad_send_wr->cur_seg, list, list)
1105 			if (mad_send_wr->cur_seg->num == seg_num)
1106 				break;
1107 	} else if (mad_send_wr->cur_seg->num > seg_num) {
1108 		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
1109 			if (mad_send_wr->cur_seg->num == seg_num)
1110 				break;
1111 	}
1112 	return mad_send_wr->cur_seg->data;
1113 }
1114 EXPORT_SYMBOL(ib_get_rmpp_segment);
1115 
1116 static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
1117 {
1118 	if (mad_send_wr->send_buf.seg_count)
1119 		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
1120 					   mad_send_wr->seg_num);
1121 	else
1122 		return mad_send_wr->send_buf.mad +
1123 		       mad_send_wr->send_buf.hdr_len;
1124 }
1125 
1126 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
1127 {
1128 	struct ib_mad_agent_private *mad_agent_priv;
1129 	struct ib_mad_send_wr_private *mad_send_wr;
1130 
1131 	mad_agent_priv = container_of(send_buf->mad_agent,
1132 				      struct ib_mad_agent_private, agent);
1133 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
1134 				   send_buf);
1135 
1136 	free_send_rmpp_list(mad_send_wr);
1137 	kfree(send_buf->mad);
1138 	deref_mad_agent(mad_agent_priv);
1139 }
1140 EXPORT_SYMBOL(ib_free_send_mad);
1141 
1142 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1143 {
1144 	struct ib_mad_qp_info *qp_info;
1145 	struct list_head *list;
1146 	struct ib_send_wr *bad_send_wr;
1147 	struct ib_mad_agent *mad_agent;
1148 	struct ib_sge *sge;
1149 	unsigned long flags;
1150 	int ret;
1151 
1152 	/* Set WR ID to find mad_send_wr upon completion */
1153 	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1154 	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1155 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1156 
1157 	mad_agent = mad_send_wr->send_buf.mad_agent;
1158 	sge = mad_send_wr->sg_list;
1159 	sge[0].addr = ib_dma_map_single(mad_agent->device,
1160 					mad_send_wr->send_buf.mad,
1161 					sge[0].length,
1162 					DMA_TO_DEVICE);
1163 	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
1164 		return -ENOMEM;
1165 
1166 	mad_send_wr->header_mapping = sge[0].addr;
1167 
1168 	sge[1].addr = ib_dma_map_single(mad_agent->device,
1169 					ib_get_payload(mad_send_wr),
1170 					sge[1].length,
1171 					DMA_TO_DEVICE);
1172 	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
1173 		ib_dma_unmap_single(mad_agent->device,
1174 				    mad_send_wr->header_mapping,
1175 				    sge[0].length, DMA_TO_DEVICE);
1176 		return -ENOMEM;
1177 	}
1178 	mad_send_wr->payload_mapping = sge[1].addr;
1179 
1180 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1181 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1182 		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
1183 				   &bad_send_wr);
1184 		list = &qp_info->send_queue.list;
1185 	} else {
1186 		ret = 0;
1187 		list = &qp_info->overflow_list;
1188 	}
1189 
1190 	if (!ret) {
1191 		qp_info->send_queue.count++;
1192 		list_add_tail(&mad_send_wr->mad_list.list, list);
1193 	}
1194 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1195 	if (ret) {
1196 		ib_dma_unmap_single(mad_agent->device,
1197 				    mad_send_wr->header_mapping,
1198 				    sge[0].length, DMA_TO_DEVICE);
1199 		ib_dma_unmap_single(mad_agent->device,
1200 				    mad_send_wr->payload_mapping,
1201 				    sge[1].length, DMA_TO_DEVICE);
1202 	}
1203 	return ret;
1204 }
1205 
1206 /*
1207  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1208  *  with the registered client
1209  */
1210 int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1211 		     struct ib_mad_send_buf **bad_send_buf)
1212 {
1213 	struct ib_mad_agent_private *mad_agent_priv;
1214 	struct ib_mad_send_buf *next_send_buf;
1215 	struct ib_mad_send_wr_private *mad_send_wr;
1216 	unsigned long flags;
1217 	int ret = -EINVAL;
1218 
1219 	/* Walk list of send WRs and post each on send list */
1220 	for (; send_buf; send_buf = next_send_buf) {
1221 
1222 		mad_send_wr = container_of(send_buf,
1223 					   struct ib_mad_send_wr_private,
1224 					   send_buf);
1225 		mad_agent_priv = mad_send_wr->mad_agent_priv;
1226 
1227 		if (!send_buf->mad_agent->send_handler ||
1228 		    (send_buf->timeout_ms &&
1229 		     !send_buf->mad_agent->recv_handler)) {
1230 			ret = -EINVAL;
1231 			goto error;
1232 		}
1233 
1234 		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1235 			if (mad_agent_priv->agent.rmpp_version) {
1236 				ret = -EINVAL;
1237 				goto error;
1238 			}
1239 		}
1240 
1241 		/*
1242 		 * Save pointer to next work request to post in case the
1243 		 * current one completes, and the user modifies the work
1244 		 * request associated with the completion
1245 		 */
1246 		next_send_buf = send_buf->next;
1247 		mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
1248 
1249 		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1250 		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1251 			ret = handle_outgoing_dr_smp(mad_agent_priv,
1252 						     mad_send_wr);
1253 			if (ret < 0)		/* error */
1254 				goto error;
1255 			else if (ret == 1)	/* locally consumed */
1256 				continue;
1257 		}
1258 
1259 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1260 		/* Timeout will be updated after send completes */
1261 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1262 		mad_send_wr->max_retries = send_buf->retries;
1263 		mad_send_wr->retries_left = send_buf->retries;
1264 		send_buf->retries = 0;
1265 		/* Reference for work request to QP + response */
1266 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1267 		mad_send_wr->status = IB_WC_SUCCESS;
1268 
1269 		/* Reference MAD agent until send completes */
1270 		atomic_inc(&mad_agent_priv->refcount);
1271 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1272 		list_add_tail(&mad_send_wr->agent_list,
1273 			      &mad_agent_priv->send_list);
1274 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1275 
1276 		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1277 			ret = ib_send_rmpp_mad(mad_send_wr);
1278 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1279 				ret = ib_send_mad(mad_send_wr);
1280 		} else
1281 			ret = ib_send_mad(mad_send_wr);
1282 		if (ret < 0) {
1283 			/* Fail send request */
1284 			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1285 			list_del(&mad_send_wr->agent_list);
1286 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1287 			atomic_dec(&mad_agent_priv->refcount);
1288 			goto error;
1289 		}
1290 	}
1291 	return 0;
1292 error:
1293 	if (bad_send_buf)
1294 		*bad_send_buf = send_buf;
1295 	return ret;
1296 }
1297 EXPORT_SYMBOL(ib_post_send_mad);
1298 
1299 /*
1300  * ib_free_recv_mad - Returns data buffers used to receive
1301  *  a MAD to the access layer
1302  */
1303 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1304 {
1305 	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1306 	struct ib_mad_private_header *mad_priv_hdr;
1307 	struct ib_mad_private *priv;
1308 	struct list_head free_list;
1309 
1310 	INIT_LIST_HEAD(&free_list);
1311 	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1312 
1313 	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1314 					&free_list, list) {
1315 		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1316 					   recv_buf);
1317 		mad_priv_hdr = container_of(mad_recv_wc,
1318 					    struct ib_mad_private_header,
1319 					    recv_wc);
1320 		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1321 				    header);
1322 		kfree(priv);
1323 	}
1324 }
1325 EXPORT_SYMBOL(ib_free_recv_mad);
1326 
1327 struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
1328 					u8 rmpp_version,
1329 					ib_mad_send_handler send_handler,
1330 					ib_mad_recv_handler recv_handler,
1331 					void *context)
1332 {
1333 	return ERR_PTR(-EINVAL);	/* XXX: for now */
1334 }
1335 EXPORT_SYMBOL(ib_redirect_mad_qp);
1336 
1337 int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
1338 		      struct ib_wc *wc)
1339 {
1340 	dev_err(&mad_agent->device->dev,
1341 		"ib_process_mad_wc() not implemented yet\n");
1342 	return 0;
1343 }
1344 EXPORT_SYMBOL(ib_process_mad_wc);
1345 
1346 static int method_in_use(struct ib_mad_mgmt_method_table **method,
1347 			 struct ib_mad_reg_req *mad_reg_req)
1348 {
1349 	int i;
1350 
1351 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1352 		if ((*method)->agent[i]) {
1353 			pr_err("Method %d already in use\n", i);
1354 			return -EINVAL;
1355 		}
1356 	}
1357 	return 0;
1358 }
1359 
1360 static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1361 {
1362 	/* Allocate management method table */
1363 	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1364 	if (!*method) {
1365 		pr_err("No memory for ib_mad_mgmt_method_table\n");
1366 		return -ENOMEM;
1367 	}
1368 
1369 	return 0;
1370 }
1371 
1372 /*
1373  * Check to see if there are any methods still in use
1374  */
1375 static int check_method_table(struct ib_mad_mgmt_method_table *method)
1376 {
1377 	int i;
1378 
1379 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1380 		if (method->agent[i])
1381 			return 1;
1382 	return 0;
1383 }
1384 
1385 /*
1386  * Check to see if there are any method tables for this class still in use
1387  */
1388 static int check_class_table(struct ib_mad_mgmt_class_table *class)
1389 {
1390 	int i;
1391 
1392 	for (i = 0; i < MAX_MGMT_CLASS; i++)
1393 		if (class->method_table[i])
1394 			return 1;
1395 	return 0;
1396 }
1397 
1398 static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1399 {
1400 	int i;
1401 
1402 	for (i = 0; i < MAX_MGMT_OUI; i++)
1403 		if (vendor_class->method_table[i])
1404 			return 1;
1405 	return 0;
1406 }
1407 
1408 static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1409 			   const char *oui)
1410 {
1411 	int i;
1412 
1413 	for (i = 0; i < MAX_MGMT_OUI; i++)
1414 		/* Is there matching OUI for this vendor class ? */
1415 		if (!memcmp(vendor_class->oui[i], oui, 3))
1416 			return i;
1417 
1418 	return -1;
1419 }
1420 
1421 static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1422 {
1423 	int i;
1424 
1425 	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1426 		if (vendor->vendor_class[i])
1427 			return 1;
1428 
1429 	return 0;
1430 }
1431 
1432 static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1433 				     struct ib_mad_agent_private *agent)
1434 {
1435 	int i;
1436 
1437 	/* Remove any methods for this mad agent */
1438 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1439 		if (method->agent[i] == agent) {
1440 			method->agent[i] = NULL;
1441 		}
1442 	}
1443 }
1444 
1445 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1446 			      struct ib_mad_agent_private *agent_priv,
1447 			      u8 mgmt_class)
1448 {
1449 	struct ib_mad_port_private *port_priv;
1450 	struct ib_mad_mgmt_class_table **class;
1451 	struct ib_mad_mgmt_method_table **method;
1452 	int i, ret;
1453 
1454 	port_priv = agent_priv->qp_info->port_priv;
1455 	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1456 	if (!*class) {
1457 		/* Allocate management class table for "new" class version */
1458 		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1459 		if (!*class) {
1460 			dev_err(&agent_priv->agent.device->dev,
1461 				"No memory for ib_mad_mgmt_class_table\n");
1462 			ret = -ENOMEM;
1463 			goto error1;
1464 		}
1465 
1466 		/* Allocate method table for this management class */
1467 		method = &(*class)->method_table[mgmt_class];
1468 		if ((ret = allocate_method_table(method)))
1469 			goto error2;
1470 	} else {
1471 		method = &(*class)->method_table[mgmt_class];
1472 		if (!*method) {
1473 			/* Allocate method table for this management class */
1474 			if ((ret = allocate_method_table(method)))
1475 				goto error1;
1476 		}
1477 	}
1478 
1479 	/* Now, make sure methods are not already in use */
1480 	if (method_in_use(method, mad_reg_req))
1481 		goto error3;
1482 
1483 	/* Finally, add in methods being registered */
1484 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1485 		(*method)->agent[i] = agent_priv;
1486 
1487 	return 0;
1488 
1489 error3:
1490 	/* Remove any methods for this mad agent */
1491 	remove_methods_mad_agent(*method, agent_priv);
1492 	/* Now, check to see if there are any methods in use */
1493 	if (!check_method_table(*method)) {
1494 		/* If not, release management method table */
1495 		kfree(*method);
1496 		*method = NULL;
1497 	}
1498 	ret = -EINVAL;
1499 	goto error1;
1500 error2:
1501 	kfree(*class);
1502 	*class = NULL;
1503 error1:
1504 	return ret;
1505 }
1506 
1507 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1508 			   struct ib_mad_agent_private *agent_priv)
1509 {
1510 	struct ib_mad_port_private *port_priv;
1511 	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1512 	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1513 	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1514 	struct ib_mad_mgmt_method_table **method;
1515 	int i, ret = -ENOMEM;
1516 	u8 vclass;
1517 
1518 	/* "New" vendor (with OUI) class */
1519 	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1520 	port_priv = agent_priv->qp_info->port_priv;
1521 	vendor_table = &port_priv->version[
1522 				mad_reg_req->mgmt_class_version].vendor;
1523 	if (!*vendor_table) {
1524 		/* Allocate mgmt vendor class table for "new" class version */
1525 		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1526 		if (!vendor) {
1527 			dev_err(&agent_priv->agent.device->dev,
1528 				"No memory for ib_mad_mgmt_vendor_class_table\n");
1529 			goto error1;
1530 		}
1531 
1532 		*vendor_table = vendor;
1533 	}
1534 	if (!(*vendor_table)->vendor_class[vclass]) {
1535 		/* Allocate table for this management vendor class */
1536 		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1537 		if (!vendor_class) {
1538 			dev_err(&agent_priv->agent.device->dev,
1539 				"No memory for ib_mad_mgmt_vendor_class\n");
1540 			goto error2;
1541 		}
1542 
1543 		(*vendor_table)->vendor_class[vclass] = vendor_class;
1544 	}
1545 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1546 		/* Is there matching OUI for this vendor class ? */
1547 		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1548 			    mad_reg_req->oui, 3)) {
1549 			method = &(*vendor_table)->vendor_class[
1550 						vclass]->method_table[i];
1551 			BUG_ON(!*method);
1552 			goto check_in_use;
1553 		}
1554 	}
1555 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1556 		/* OUI slot available ? */
1557 		if (!is_vendor_oui((*vendor_table)->vendor_class[
1558 				vclass]->oui[i])) {
1559 			method = &(*vendor_table)->vendor_class[
1560 				vclass]->method_table[i];
1561 			BUG_ON(*method);
1562 			/* Allocate method table for this OUI */
1563 			if ((ret = allocate_method_table(method)))
1564 				goto error3;
1565 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1566 			       mad_reg_req->oui, 3);
1567 			goto check_in_use;
1568 		}
1569 	}
1570 	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1571 	goto error3;
1572 
1573 check_in_use:
1574 	/* Now, make sure methods are not already in use */
1575 	if (method_in_use(method, mad_reg_req))
1576 		goto error4;
1577 
1578 	/* Finally, add in methods being registered */
1579 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1580 		(*method)->agent[i] = agent_priv;
1581 
1582 	return 0;
1583 
1584 error4:
1585 	/* Remove any methods for this mad agent */
1586 	remove_methods_mad_agent(*method, agent_priv);
1587 	/* Now, check to see if there are any methods in use */
1588 	if (!check_method_table(*method)) {
1589 		/* If not, release management method table */
1590 		kfree(*method);
1591 		*method = NULL;
1592 	}
1593 	ret = -EINVAL;
1594 error3:
1595 	if (vendor_class) {
1596 		(*vendor_table)->vendor_class[vclass] = NULL;
1597 		kfree(vendor_class);
1598 	}
1599 error2:
1600 	if (vendor) {
1601 		*vendor_table = NULL;
1602 		kfree(vendor);
1603 	}
1604 error1:
1605 	return ret;
1606 }
1607 
1608 static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1609 {
1610 	struct ib_mad_port_private *port_priv;
1611 	struct ib_mad_mgmt_class_table *class;
1612 	struct ib_mad_mgmt_method_table *method;
1613 	struct ib_mad_mgmt_vendor_class_table *vendor;
1614 	struct ib_mad_mgmt_vendor_class *vendor_class;
1615 	int index;
1616 	u8 mgmt_class;
1617 
1618 	/*
1619 	 * Was MAD registration request supplied
1620 	 * with original registration ?
1621 	 */
1622 	if (!agent_priv->reg_req) {
1623 		goto out;
1624 	}
1625 
1626 	port_priv = agent_priv->qp_info->port_priv;
1627 	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1628 	class = port_priv->version[
1629 			agent_priv->reg_req->mgmt_class_version].class;
1630 	if (!class)
1631 		goto vendor_check;
1632 
1633 	method = class->method_table[mgmt_class];
1634 	if (method) {
1635 		/* Remove any methods for this mad agent */
1636 		remove_methods_mad_agent(method, agent_priv);
1637 		/* Now, check to see if there are any methods still in use */
1638 		if (!check_method_table(method)) {
1639 			/* If not, release management method table */
1640 			 kfree(method);
1641 			 class->method_table[mgmt_class] = NULL;
1642 			 /* Any management classes left ? */
1643 			if (!check_class_table(class)) {
1644 				/* If not, release management class table */
1645 				kfree(class);
1646 				port_priv->version[
1647 					agent_priv->reg_req->
1648 					mgmt_class_version].class = NULL;
1649 			}
1650 		}
1651 	}
1652 
1653 vendor_check:
1654 	if (!is_vendor_class(mgmt_class))
1655 		goto out;
1656 
1657 	/* normalize mgmt_class to vendor range 2 */
1658 	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1659 	vendor = port_priv->version[
1660 			agent_priv->reg_req->mgmt_class_version].vendor;
1661 
1662 	if (!vendor)
1663 		goto out;
1664 
1665 	vendor_class = vendor->vendor_class[mgmt_class];
1666 	if (vendor_class) {
1667 		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1668 		if (index < 0)
1669 			goto out;
1670 		method = vendor_class->method_table[index];
1671 		if (method) {
1672 			/* Remove any methods for this mad agent */
1673 			remove_methods_mad_agent(method, agent_priv);
1674 			/*
1675 			 * Now, check to see if there are
1676 			 * any methods still in use
1677 			 */
1678 			if (!check_method_table(method)) {
1679 				/* If not, release management method table */
1680 				kfree(method);
1681 				vendor_class->method_table[index] = NULL;
1682 				memset(vendor_class->oui[index], 0, 3);
1683 				/* Any OUIs left ? */
1684 				if (!check_vendor_class(vendor_class)) {
1685 					/* If not, release vendor class table */
1686 					kfree(vendor_class);
1687 					vendor->vendor_class[mgmt_class] = NULL;
1688 					/* Any other vendor classes left ? */
1689 					if (!check_vendor_table(vendor)) {
1690 						kfree(vendor);
1691 						port_priv->version[
1692 							agent_priv->reg_req->
1693 							mgmt_class_version].
1694 							vendor = NULL;
1695 					}
1696 				}
1697 			}
1698 		}
1699 	}
1700 
1701 out:
1702 	return;
1703 }
1704 
1705 static struct ib_mad_agent_private *
1706 find_mad_agent(struct ib_mad_port_private *port_priv,
1707 	       const struct ib_mad_hdr *mad_hdr)
1708 {
1709 	struct ib_mad_agent_private *mad_agent = NULL;
1710 	unsigned long flags;
1711 
1712 	spin_lock_irqsave(&port_priv->reg_lock, flags);
1713 	if (ib_response_mad(mad_hdr)) {
1714 		u32 hi_tid;
1715 		struct ib_mad_agent_private *entry;
1716 
1717 		/*
1718 		 * Routing is based on high 32 bits of transaction ID
1719 		 * of MAD.
1720 		 */
1721 		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
1722 		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
1723 			if (entry->agent.hi_tid == hi_tid) {
1724 				mad_agent = entry;
1725 				break;
1726 			}
1727 		}
1728 	} else {
1729 		struct ib_mad_mgmt_class_table *class;
1730 		struct ib_mad_mgmt_method_table *method;
1731 		struct ib_mad_mgmt_vendor_class_table *vendor;
1732 		struct ib_mad_mgmt_vendor_class *vendor_class;
1733 		const struct ib_vendor_mad *vendor_mad;
1734 		int index;
1735 
1736 		/*
1737 		 * Routing is based on version, class, and method
1738 		 * For "newer" vendor MADs, also based on OUI
1739 		 */
1740 		if (mad_hdr->class_version >= MAX_MGMT_VERSION)
1741 			goto out;
1742 		if (!is_vendor_class(mad_hdr->mgmt_class)) {
1743 			class = port_priv->version[
1744 					mad_hdr->class_version].class;
1745 			if (!class)
1746 				goto out;
1747 			if (convert_mgmt_class(mad_hdr->mgmt_class) >=
1748 			    IB_MGMT_MAX_METHODS)
1749 				goto out;
1750 			method = class->method_table[convert_mgmt_class(
1751 							mad_hdr->mgmt_class)];
1752 			if (method)
1753 				mad_agent = method->agent[mad_hdr->method &
1754 							  ~IB_MGMT_METHOD_RESP];
1755 		} else {
1756 			vendor = port_priv->version[
1757 					mad_hdr->class_version].vendor;
1758 			if (!vendor)
1759 				goto out;
1760 			vendor_class = vendor->vendor_class[vendor_class_index(
1761 						mad_hdr->mgmt_class)];
1762 			if (!vendor_class)
1763 				goto out;
1764 			/* Find matching OUI */
1765 			vendor_mad = (const struct ib_vendor_mad *)mad_hdr;
1766 			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1767 			if (index == -1)
1768 				goto out;
1769 			method = vendor_class->method_table[index];
1770 			if (method) {
1771 				mad_agent = method->agent[mad_hdr->method &
1772 							  ~IB_MGMT_METHOD_RESP];
1773 			}
1774 		}
1775 	}
1776 
1777 	if (mad_agent) {
1778 		if (mad_agent->agent.recv_handler)
1779 			atomic_inc(&mad_agent->refcount);
1780 		else {
1781 			dev_notice(&port_priv->device->dev,
1782 				   "No receive handler for client %p on port %d\n",
1783 				   &mad_agent->agent, port_priv->port_num);
1784 			mad_agent = NULL;
1785 		}
1786 	}
1787 out:
1788 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1789 
1790 	return mad_agent;
1791 }
1792 
1793 static int validate_mad(const struct ib_mad_hdr *mad_hdr,
1794 			const struct ib_mad_qp_info *qp_info,
1795 			bool opa)
1796 {
1797 	int valid = 0;
1798 	u32 qp_num = qp_info->qp->qp_num;
1799 
1800 	/* Make sure MAD base version is understood */
1801 	if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
1802 	    (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
1803 		pr_err("MAD received with unsupported base version %d %s\n",
1804 		       mad_hdr->base_version, opa ? "(opa)" : "");
1805 		goto out;
1806 	}
1807 
1808 	/* Filter SMI packets sent to other than QP0 */
1809 	if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1810 	    (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1811 		if (qp_num == 0)
1812 			valid = 1;
1813 	} else {
1814 		/* Filter GSI packets sent to QP0 */
1815 		if (qp_num != 0)
1816 			valid = 1;
1817 	}
1818 
1819 out:
1820 	return valid;
1821 }
1822 
1823 static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv,
1824 			    const struct ib_mad_hdr *mad_hdr)
1825 {
1826 	struct ib_rmpp_mad *rmpp_mad;
1827 
1828 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1829 	return !mad_agent_priv->agent.rmpp_version ||
1830 		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1831 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1832 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1833 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1834 }
1835 
1836 static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
1837 				     const struct ib_mad_recv_wc *rwc)
1838 {
1839 	return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class ==
1840 		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1841 }
1842 
1843 static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
1844 				   const struct ib_mad_send_wr_private *wr,
1845 				   const struct ib_mad_recv_wc *rwc )
1846 {
1847 	struct ib_ah_attr attr;
1848 	u8 send_resp, rcv_resp;
1849 	union ib_gid sgid;
1850 	struct ib_device *device = mad_agent_priv->agent.device;
1851 	u8 port_num = mad_agent_priv->agent.port_num;
1852 	u8 lmc;
1853 
1854 	send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
1855 	rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
1856 
1857 	if (send_resp == rcv_resp)
1858 		/* both requests, or both responses. GIDs different */
1859 		return 0;
1860 
1861 	if (ib_query_ah(wr->send_buf.ah, &attr))
1862 		/* Assume not equal, to avoid false positives. */
1863 		return 0;
1864 
1865 	if (!!(attr.ah_flags & IB_AH_GRH) !=
1866 	    !!(rwc->wc->wc_flags & IB_WC_GRH))
1867 		/* one has GID, other does not.  Assume different */
1868 		return 0;
1869 
1870 	if (!send_resp && rcv_resp) {
1871 		/* is request/response. */
1872 		if (!(attr.ah_flags & IB_AH_GRH)) {
1873 			if (ib_get_cached_lmc(device, port_num, &lmc))
1874 				return 0;
1875 			return (!lmc || !((attr.src_path_bits ^
1876 					   rwc->wc->dlid_path_bits) &
1877 					  ((1 << lmc) - 1)));
1878 		} else {
1879 			if (ib_get_cached_gid(device, port_num,
1880 					      attr.grh.sgid_index, &sgid))
1881 				return 0;
1882 			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1883 				       16);
1884 		}
1885 	}
1886 
1887 	if (!(attr.ah_flags & IB_AH_GRH))
1888 		return attr.dlid == rwc->wc->slid;
1889 	else
1890 		return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
1891 			       16);
1892 }
1893 
1894 static inline int is_direct(u8 class)
1895 {
1896 	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1897 }
1898 
1899 struct ib_mad_send_wr_private*
1900 ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
1901 		 const struct ib_mad_recv_wc *wc)
1902 {
1903 	struct ib_mad_send_wr_private *wr;
1904 	const struct ib_mad_hdr *mad_hdr;
1905 
1906 	mad_hdr = &wc->recv_buf.mad->mad_hdr;
1907 
1908 	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1909 		if ((wr->tid == mad_hdr->tid) &&
1910 		    rcv_has_same_class(wr, wc) &&
1911 		    /*
1912 		     * Don't check GID for direct routed MADs.
1913 		     * These might have permissive LIDs.
1914 		     */
1915 		    (is_direct(mad_hdr->mgmt_class) ||
1916 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1917 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1918 	}
1919 
1920 	/*
1921 	 * It's possible to receive the response before we've
1922 	 * been notified that the send has completed
1923 	 */
1924 	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1925 		if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1926 		    wr->tid == mad_hdr->tid &&
1927 		    wr->timeout &&
1928 		    rcv_has_same_class(wr, wc) &&
1929 		    /*
1930 		     * Don't check GID for direct routed MADs.
1931 		     * These might have permissive LIDs.
1932 		     */
1933 		    (is_direct(mad_hdr->mgmt_class) ||
1934 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1935 			/* Verify request has not been canceled */
1936 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1937 	}
1938 	return NULL;
1939 }
1940 
1941 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1942 {
1943 	mad_send_wr->timeout = 0;
1944 	if (mad_send_wr->refcount == 1)
1945 		list_move_tail(&mad_send_wr->agent_list,
1946 			      &mad_send_wr->mad_agent_priv->done_list);
1947 }
1948 
1949 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1950 				 struct ib_mad_recv_wc *mad_recv_wc)
1951 {
1952 	struct ib_mad_send_wr_private *mad_send_wr;
1953 	struct ib_mad_send_wc mad_send_wc;
1954 	unsigned long flags;
1955 
1956 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1957 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1958 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1959 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1960 						      mad_recv_wc);
1961 		if (!mad_recv_wc) {
1962 			deref_mad_agent(mad_agent_priv);
1963 			return;
1964 		}
1965 	}
1966 
1967 	/* Complete corresponding request */
1968 	if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) {
1969 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1970 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1971 		if (!mad_send_wr) {
1972 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1973 			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1974 			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1975 			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1976 					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
1977 				/* user rmpp is in effect
1978 				 * and this is an active RMPP MAD
1979 				 */
1980 				mad_recv_wc->wc->wr_id = 0;
1981 				mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1982 								   mad_recv_wc);
1983 				atomic_dec(&mad_agent_priv->refcount);
1984 			} else {
1985 				/* not user rmpp, revert to normal behavior and
1986 				 * drop the mad */
1987 				ib_free_recv_mad(mad_recv_wc);
1988 				deref_mad_agent(mad_agent_priv);
1989 				return;
1990 			}
1991 		} else {
1992 			ib_mark_mad_done(mad_send_wr);
1993 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1994 
1995 			/* Defined behavior is to complete response before request */
1996 			mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
1997 			mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1998 							   mad_recv_wc);
1999 			atomic_dec(&mad_agent_priv->refcount);
2000 
2001 			mad_send_wc.status = IB_WC_SUCCESS;
2002 			mad_send_wc.vendor_err = 0;
2003 			mad_send_wc.send_buf = &mad_send_wr->send_buf;
2004 			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2005 		}
2006 	} else {
2007 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
2008 						   mad_recv_wc);
2009 		deref_mad_agent(mad_agent_priv);
2010 	}
2011 }
2012 
2013 static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
2014 				     const struct ib_mad_qp_info *qp_info,
2015 				     const struct ib_wc *wc,
2016 				     int port_num,
2017 				     struct ib_mad_private *recv,
2018 				     struct ib_mad_private *response)
2019 {
2020 	enum smi_forward_action retsmi;
2021 	struct ib_smp *smp = (struct ib_smp *)recv->mad;
2022 
2023 	if (smi_handle_dr_smp_recv(smp,
2024 				   rdma_cap_ib_switch(port_priv->device),
2025 				   port_num,
2026 				   port_priv->device->phys_port_cnt) ==
2027 				   IB_SMI_DISCARD)
2028 		return IB_SMI_DISCARD;
2029 
2030 	retsmi = smi_check_forward_dr_smp(smp);
2031 	if (retsmi == IB_SMI_LOCAL)
2032 		return IB_SMI_HANDLE;
2033 
2034 	if (retsmi == IB_SMI_SEND) { /* don't forward */
2035 		if (smi_handle_dr_smp_send(smp,
2036 					   rdma_cap_ib_switch(port_priv->device),
2037 					   port_num) == IB_SMI_DISCARD)
2038 			return IB_SMI_DISCARD;
2039 
2040 		if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD)
2041 			return IB_SMI_DISCARD;
2042 	} else if (rdma_cap_ib_switch(port_priv->device)) {
2043 		/* forward case for switches */
2044 		memcpy(response, recv, mad_priv_size(response));
2045 		response->header.recv_wc.wc = &response->header.wc;
2046 		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
2047 		response->header.recv_wc.recv_buf.grh = &response->grh;
2048 
2049 		agent_send_response((const struct ib_mad_hdr *)response->mad,
2050 				    &response->grh, wc,
2051 				    port_priv->device,
2052 				    smi_get_fwd_port(smp),
2053 				    qp_info->qp->qp_num,
2054 				    response->mad_size,
2055 				    false);
2056 
2057 		return IB_SMI_DISCARD;
2058 	}
2059 	return IB_SMI_HANDLE;
2060 }
2061 
2062 static bool generate_unmatched_resp(const struct ib_mad_private *recv,
2063 				    struct ib_mad_private *response,
2064 				    size_t *resp_len, bool opa)
2065 {
2066 	const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad;
2067 	struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad;
2068 
2069 	if (recv_hdr->method == IB_MGMT_METHOD_GET ||
2070 	    recv_hdr->method == IB_MGMT_METHOD_SET) {
2071 		memcpy(response, recv, mad_priv_size(response));
2072 		response->header.recv_wc.wc = &response->header.wc;
2073 		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
2074 		response->header.recv_wc.recv_buf.grh = &response->grh;
2075 		resp_hdr->method = IB_MGMT_METHOD_GET_RESP;
2076 		resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
2077 		if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
2078 			resp_hdr->status |= IB_SMP_DIRECTION;
2079 
2080 		if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) {
2081 			if (recv_hdr->mgmt_class ==
2082 			    IB_MGMT_CLASS_SUBN_LID_ROUTED ||
2083 			    recv_hdr->mgmt_class ==
2084 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
2085 				*resp_len = opa_get_smp_header_size(
2086 							(struct opa_smp *)recv->mad);
2087 			else
2088 				*resp_len = sizeof(struct ib_mad_hdr);
2089 		}
2090 
2091 		return true;
2092 	} else {
2093 		return false;
2094 	}
2095 }
2096 
2097 static enum smi_action
2098 handle_opa_smi(struct ib_mad_port_private *port_priv,
2099 	       struct ib_mad_qp_info *qp_info,
2100 	       struct ib_wc *wc,
2101 	       int port_num,
2102 	       struct ib_mad_private *recv,
2103 	       struct ib_mad_private *response)
2104 {
2105 	enum smi_forward_action retsmi;
2106 	struct opa_smp *smp = (struct opa_smp *)recv->mad;
2107 
2108 	if (opa_smi_handle_dr_smp_recv(smp,
2109 				   rdma_cap_ib_switch(port_priv->device),
2110 				   port_num,
2111 				   port_priv->device->phys_port_cnt) ==
2112 				   IB_SMI_DISCARD)
2113 		return IB_SMI_DISCARD;
2114 
2115 	retsmi = opa_smi_check_forward_dr_smp(smp);
2116 	if (retsmi == IB_SMI_LOCAL)
2117 		return IB_SMI_HANDLE;
2118 
2119 	if (retsmi == IB_SMI_SEND) { /* don't forward */
2120 		if (opa_smi_handle_dr_smp_send(smp,
2121 					   rdma_cap_ib_switch(port_priv->device),
2122 					   port_num) == IB_SMI_DISCARD)
2123 			return IB_SMI_DISCARD;
2124 
2125 		if (opa_smi_check_local_smp(smp, port_priv->device) ==
2126 		    IB_SMI_DISCARD)
2127 			return IB_SMI_DISCARD;
2128 
2129 	} else if (rdma_cap_ib_switch(port_priv->device)) {
2130 		/* forward case for switches */
2131 		memcpy(response, recv, mad_priv_size(response));
2132 		response->header.recv_wc.wc = &response->header.wc;
2133 		response->header.recv_wc.recv_buf.opa_mad =
2134 				(struct opa_mad *)response->mad;
2135 		response->header.recv_wc.recv_buf.grh = &response->grh;
2136 
2137 		agent_send_response((const struct ib_mad_hdr *)response->mad,
2138 				    &response->grh, wc,
2139 				    port_priv->device,
2140 				    opa_smi_get_fwd_port(smp),
2141 				    qp_info->qp->qp_num,
2142 				    recv->header.wc.byte_len,
2143 				    true);
2144 
2145 		return IB_SMI_DISCARD;
2146 	}
2147 
2148 	return IB_SMI_HANDLE;
2149 }
2150 
2151 static enum smi_action
2152 handle_smi(struct ib_mad_port_private *port_priv,
2153 	   struct ib_mad_qp_info *qp_info,
2154 	   struct ib_wc *wc,
2155 	   int port_num,
2156 	   struct ib_mad_private *recv,
2157 	   struct ib_mad_private *response,
2158 	   bool opa)
2159 {
2160 	struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
2161 
2162 	if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
2163 	    mad_hdr->class_version == OPA_SMI_CLASS_VERSION)
2164 		return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
2165 				      response);
2166 
2167 	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
2168 }
2169 
2170 static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
2171 				     struct ib_wc *wc)
2172 {
2173 	struct ib_mad_qp_info *qp_info;
2174 	struct ib_mad_private_header *mad_priv_hdr;
2175 	struct ib_mad_private *recv, *response = NULL;
2176 	struct ib_mad_list_head *mad_list;
2177 	struct ib_mad_agent_private *mad_agent;
2178 	int port_num;
2179 	int ret = IB_MAD_RESULT_SUCCESS;
2180 	size_t mad_size;
2181 	u16 resp_mad_pkey_index = 0;
2182 	bool opa;
2183 
2184 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2185 	qp_info = mad_list->mad_queue->qp_info;
2186 	dequeue_mad(mad_list);
2187 
2188 	opa = rdma_cap_opa_mad(qp_info->port_priv->device,
2189 			       qp_info->port_priv->port_num);
2190 
2191 	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
2192 				    mad_list);
2193 	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
2194 	ib_dma_unmap_single(port_priv->device,
2195 			    recv->header.mapping,
2196 			    mad_priv_dma_size(recv),
2197 			    DMA_FROM_DEVICE);
2198 
2199 	/* Setup MAD receive work completion from "normal" work completion */
2200 	recv->header.wc = *wc;
2201 	recv->header.recv_wc.wc = &recv->header.wc;
2202 
2203 	if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) {
2204 		recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh);
2205 		recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2206 	} else {
2207 		recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2208 		recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2209 	}
2210 
2211 	recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
2212 	recv->header.recv_wc.recv_buf.grh = &recv->grh;
2213 
2214 	if (atomic_read(&qp_info->snoop_count))
2215 		snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
2216 
2217 	/* Validate MAD */
2218 	if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
2219 		goto out;
2220 
2221 	mad_size = recv->mad_size;
2222 	response = alloc_mad_private(mad_size, GFP_KERNEL);
2223 	if (!response) {
2224 		dev_err(&port_priv->device->dev,
2225 			"ib_mad_recv_done_handler no memory for response buffer\n");
2226 		goto out;
2227 	}
2228 
2229 	if (rdma_cap_ib_switch(port_priv->device))
2230 		port_num = wc->port_num;
2231 	else
2232 		port_num = port_priv->port_num;
2233 
2234 	if (((struct ib_mad_hdr *)recv->mad)->mgmt_class ==
2235 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
2236 		if (handle_smi(port_priv, qp_info, wc, port_num, recv,
2237 			       response, opa)
2238 		    == IB_SMI_DISCARD)
2239 			goto out;
2240 	}
2241 
2242 	/* Give driver "right of first refusal" on incoming MAD */
2243 	if (port_priv->device->process_mad) {
2244 		ret = port_priv->device->process_mad(port_priv->device, 0,
2245 						     port_priv->port_num,
2246 						     wc, &recv->grh,
2247 						     (const struct ib_mad_hdr *)recv->mad,
2248 						     recv->mad_size,
2249 						     (struct ib_mad_hdr *)response->mad,
2250 						     &mad_size, &resp_mad_pkey_index);
2251 
2252 		if (opa)
2253 			wc->pkey_index = resp_mad_pkey_index;
2254 
2255 		if (ret & IB_MAD_RESULT_SUCCESS) {
2256 			if (ret & IB_MAD_RESULT_CONSUMED)
2257 				goto out;
2258 			if (ret & IB_MAD_RESULT_REPLY) {
2259 				agent_send_response((const struct ib_mad_hdr *)response->mad,
2260 						    &recv->grh, wc,
2261 						    port_priv->device,
2262 						    port_num,
2263 						    qp_info->qp->qp_num,
2264 						    mad_size, opa);
2265 				goto out;
2266 			}
2267 		}
2268 	}
2269 
2270 	mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
2271 	if (mad_agent) {
2272 		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
2273 		/*
2274 		 * recv is freed up in error cases in ib_mad_complete_recv
2275 		 * or via recv_handler in ib_mad_complete_recv()
2276 		 */
2277 		recv = NULL;
2278 	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
2279 		   generate_unmatched_resp(recv, response, &mad_size, opa)) {
2280 		agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc,
2281 				    port_priv->device, port_num,
2282 				    qp_info->qp->qp_num, mad_size, opa);
2283 	}
2284 
2285 out:
2286 	/* Post another receive request for this QP */
2287 	if (response) {
2288 		ib_mad_post_receive_mads(qp_info, response);
2289 		kfree(recv);
2290 	} else
2291 		ib_mad_post_receive_mads(qp_info, recv);
2292 }
2293 
2294 static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2295 {
2296 	struct ib_mad_send_wr_private *mad_send_wr;
2297 	unsigned long delay;
2298 
2299 	if (list_empty(&mad_agent_priv->wait_list)) {
2300 		cancel_delayed_work(&mad_agent_priv->timed_work);
2301 	} else {
2302 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2303 					 struct ib_mad_send_wr_private,
2304 					 agent_list);
2305 
2306 		if (time_after(mad_agent_priv->timeout,
2307 			       mad_send_wr->timeout)) {
2308 			mad_agent_priv->timeout = mad_send_wr->timeout;
2309 			delay = mad_send_wr->timeout - jiffies;
2310 			if ((long)delay <= 0)
2311 				delay = 1;
2312 			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2313 					 &mad_agent_priv->timed_work, delay);
2314 		}
2315 	}
2316 }
2317 
2318 static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2319 {
2320 	struct ib_mad_agent_private *mad_agent_priv;
2321 	struct ib_mad_send_wr_private *temp_mad_send_wr;
2322 	struct list_head *list_item;
2323 	unsigned long delay;
2324 
2325 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2326 	list_del(&mad_send_wr->agent_list);
2327 
2328 	delay = mad_send_wr->timeout;
2329 	mad_send_wr->timeout += jiffies;
2330 
2331 	if (delay) {
2332 		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2333 			temp_mad_send_wr = list_entry(list_item,
2334 						struct ib_mad_send_wr_private,
2335 						agent_list);
2336 			if (time_after(mad_send_wr->timeout,
2337 				       temp_mad_send_wr->timeout))
2338 				break;
2339 		}
2340 	}
2341 	else
2342 		list_item = &mad_agent_priv->wait_list;
2343 	list_add(&mad_send_wr->agent_list, list_item);
2344 
2345 	/* Reschedule a work item if we have a shorter timeout */
2346 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2347 		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2348 				 &mad_agent_priv->timed_work, delay);
2349 }
2350 
2351 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2352 			  int timeout_ms)
2353 {
2354 	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2355 	wait_for_response(mad_send_wr);
2356 }
2357 
2358 /*
2359  * Process a send work completion
2360  */
2361 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2362 			     struct ib_mad_send_wc *mad_send_wc)
2363 {
2364 	struct ib_mad_agent_private	*mad_agent_priv;
2365 	unsigned long			flags;
2366 	int				ret;
2367 
2368 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2369 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2370 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2371 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2372 		if (ret == IB_RMPP_RESULT_CONSUMED)
2373 			goto done;
2374 	} else
2375 		ret = IB_RMPP_RESULT_UNHANDLED;
2376 
2377 	if (mad_send_wc->status != IB_WC_SUCCESS &&
2378 	    mad_send_wr->status == IB_WC_SUCCESS) {
2379 		mad_send_wr->status = mad_send_wc->status;
2380 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2381 	}
2382 
2383 	if (--mad_send_wr->refcount > 0) {
2384 		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2385 		    mad_send_wr->status == IB_WC_SUCCESS) {
2386 			wait_for_response(mad_send_wr);
2387 		}
2388 		goto done;
2389 	}
2390 
2391 	/* Remove send from MAD agent and notify client of completion */
2392 	list_del(&mad_send_wr->agent_list);
2393 	adjust_timeout(mad_agent_priv);
2394 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2395 
2396 	if (mad_send_wr->status != IB_WC_SUCCESS )
2397 		mad_send_wc->status = mad_send_wr->status;
2398 	if (ret == IB_RMPP_RESULT_INTERNAL)
2399 		ib_rmpp_send_handler(mad_send_wc);
2400 	else
2401 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2402 						   mad_send_wc);
2403 
2404 	/* Release reference on agent taken when sending */
2405 	deref_mad_agent(mad_agent_priv);
2406 	return;
2407 done:
2408 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2409 }
2410 
2411 static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
2412 				     struct ib_wc *wc)
2413 {
2414 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2415 	struct ib_mad_list_head		*mad_list;
2416 	struct ib_mad_qp_info		*qp_info;
2417 	struct ib_mad_queue		*send_queue;
2418 	struct ib_send_wr		*bad_send_wr;
2419 	struct ib_mad_send_wc		mad_send_wc;
2420 	unsigned long flags;
2421 	int ret;
2422 
2423 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2424 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2425 				   mad_list);
2426 	send_queue = mad_list->mad_queue;
2427 	qp_info = send_queue->qp_info;
2428 
2429 retry:
2430 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2431 			    mad_send_wr->header_mapping,
2432 			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2433 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2434 			    mad_send_wr->payload_mapping,
2435 			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2436 	queued_send_wr = NULL;
2437 	spin_lock_irqsave(&send_queue->lock, flags);
2438 	list_del(&mad_list->list);
2439 
2440 	/* Move queued send to the send queue */
2441 	if (send_queue->count-- > send_queue->max_active) {
2442 		mad_list = container_of(qp_info->overflow_list.next,
2443 					struct ib_mad_list_head, list);
2444 		queued_send_wr = container_of(mad_list,
2445 					struct ib_mad_send_wr_private,
2446 					mad_list);
2447 		list_move_tail(&mad_list->list, &send_queue->list);
2448 	}
2449 	spin_unlock_irqrestore(&send_queue->lock, flags);
2450 
2451 	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2452 	mad_send_wc.status = wc->status;
2453 	mad_send_wc.vendor_err = wc->vendor_err;
2454 	if (atomic_read(&qp_info->snoop_count))
2455 		snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
2456 			   IB_MAD_SNOOP_SEND_COMPLETIONS);
2457 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2458 
2459 	if (queued_send_wr) {
2460 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
2461 				   &bad_send_wr);
2462 		if (ret) {
2463 			dev_err(&port_priv->device->dev,
2464 				"ib_post_send failed: %d\n", ret);
2465 			mad_send_wr = queued_send_wr;
2466 			wc->status = IB_WC_LOC_QP_OP_ERR;
2467 			goto retry;
2468 		}
2469 	}
2470 }
2471 
2472 static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2473 {
2474 	struct ib_mad_send_wr_private *mad_send_wr;
2475 	struct ib_mad_list_head *mad_list;
2476 	unsigned long flags;
2477 
2478 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2479 	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2480 		mad_send_wr = container_of(mad_list,
2481 					   struct ib_mad_send_wr_private,
2482 					   mad_list);
2483 		mad_send_wr->retry = 1;
2484 	}
2485 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2486 }
2487 
2488 static void mad_error_handler(struct ib_mad_port_private *port_priv,
2489 			      struct ib_wc *wc)
2490 {
2491 	struct ib_mad_list_head *mad_list;
2492 	struct ib_mad_qp_info *qp_info;
2493 	struct ib_mad_send_wr_private *mad_send_wr;
2494 	int ret;
2495 
2496 	/* Determine if failure was a send or receive */
2497 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2498 	qp_info = mad_list->mad_queue->qp_info;
2499 	if (mad_list->mad_queue == &qp_info->recv_queue)
2500 		/*
2501 		 * Receive errors indicate that the QP has entered the error
2502 		 * state - error handling/shutdown code will cleanup
2503 		 */
2504 		return;
2505 
2506 	/*
2507 	 * Send errors will transition the QP to SQE - move
2508 	 * QP to RTS and repost flushed work requests
2509 	 */
2510 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2511 				   mad_list);
2512 	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2513 		if (mad_send_wr->retry) {
2514 			/* Repost send */
2515 			struct ib_send_wr *bad_send_wr;
2516 
2517 			mad_send_wr->retry = 0;
2518 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
2519 					&bad_send_wr);
2520 			if (ret)
2521 				ib_mad_send_done_handler(port_priv, wc);
2522 		} else
2523 			ib_mad_send_done_handler(port_priv, wc);
2524 	} else {
2525 		struct ib_qp_attr *attr;
2526 
2527 		/* Transition QP to RTS and fail offending send */
2528 		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2529 		if (attr) {
2530 			attr->qp_state = IB_QPS_RTS;
2531 			attr->cur_qp_state = IB_QPS_SQE;
2532 			ret = ib_modify_qp(qp_info->qp, attr,
2533 					   IB_QP_STATE | IB_QP_CUR_STATE);
2534 			kfree(attr);
2535 			if (ret)
2536 				dev_err(&port_priv->device->dev,
2537 					"mad_error_handler - ib_modify_qp to RTS : %d\n",
2538 					ret);
2539 			else
2540 				mark_sends_for_retry(qp_info);
2541 		}
2542 		ib_mad_send_done_handler(port_priv, wc);
2543 	}
2544 }
2545 
2546 /*
2547  * IB MAD completion callback
2548  */
2549 static void ib_mad_completion_handler(struct work_struct *work)
2550 {
2551 	struct ib_mad_port_private *port_priv;
2552 	struct ib_wc wc;
2553 
2554 	port_priv = container_of(work, struct ib_mad_port_private, work);
2555 	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2556 
2557 	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
2558 		if (wc.status == IB_WC_SUCCESS) {
2559 			switch (wc.opcode) {
2560 			case IB_WC_SEND:
2561 				ib_mad_send_done_handler(port_priv, &wc);
2562 				break;
2563 			case IB_WC_RECV:
2564 				ib_mad_recv_done_handler(port_priv, &wc);
2565 				break;
2566 			default:
2567 				BUG_ON(1);
2568 				break;
2569 			}
2570 		} else
2571 			mad_error_handler(port_priv, &wc);
2572 	}
2573 }
2574 
2575 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2576 {
2577 	unsigned long flags;
2578 	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2579 	struct ib_mad_send_wc mad_send_wc;
2580 	struct list_head cancel_list;
2581 
2582 	INIT_LIST_HEAD(&cancel_list);
2583 
2584 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2585 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2586 				 &mad_agent_priv->send_list, agent_list) {
2587 		if (mad_send_wr->status == IB_WC_SUCCESS) {
2588 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2589 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2590 		}
2591 	}
2592 
2593 	/* Empty wait list to prevent receives from finding a request */
2594 	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2595 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2596 
2597 	/* Report all cancelled requests */
2598 	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2599 	mad_send_wc.vendor_err = 0;
2600 
2601 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2602 				 &cancel_list, agent_list) {
2603 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2604 		list_del(&mad_send_wr->agent_list);
2605 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2606 						   &mad_send_wc);
2607 		atomic_dec(&mad_agent_priv->refcount);
2608 	}
2609 }
2610 
2611 static struct ib_mad_send_wr_private*
2612 find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2613 	     struct ib_mad_send_buf *send_buf)
2614 {
2615 	struct ib_mad_send_wr_private *mad_send_wr;
2616 
2617 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2618 			    agent_list) {
2619 		if (&mad_send_wr->send_buf == send_buf)
2620 			return mad_send_wr;
2621 	}
2622 
2623 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2624 			    agent_list) {
2625 		if (is_rmpp_data_mad(mad_agent_priv,
2626 				     mad_send_wr->send_buf.mad) &&
2627 		    &mad_send_wr->send_buf == send_buf)
2628 			return mad_send_wr;
2629 	}
2630 	return NULL;
2631 }
2632 
2633 int ib_modify_mad(struct ib_mad_agent *mad_agent,
2634 		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2635 {
2636 	struct ib_mad_agent_private *mad_agent_priv;
2637 	struct ib_mad_send_wr_private *mad_send_wr;
2638 	unsigned long flags;
2639 	int active;
2640 
2641 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2642 				      agent);
2643 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2644 	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2645 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2646 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2647 		return -EINVAL;
2648 	}
2649 
2650 	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2651 	if (!timeout_ms) {
2652 		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2653 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2654 	}
2655 
2656 	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2657 	if (active)
2658 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2659 	else
2660 		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2661 
2662 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2663 	return 0;
2664 }
2665 EXPORT_SYMBOL(ib_modify_mad);
2666 
2667 void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2668 		   struct ib_mad_send_buf *send_buf)
2669 {
2670 	ib_modify_mad(mad_agent, send_buf, 0);
2671 }
2672 EXPORT_SYMBOL(ib_cancel_mad);
2673 
2674 static void local_completions(struct work_struct *work)
2675 {
2676 	struct ib_mad_agent_private *mad_agent_priv;
2677 	struct ib_mad_local_private *local;
2678 	struct ib_mad_agent_private *recv_mad_agent;
2679 	unsigned long flags;
2680 	int free_mad;
2681 	struct ib_wc wc;
2682 	struct ib_mad_send_wc mad_send_wc;
2683 	bool opa;
2684 
2685 	mad_agent_priv =
2686 		container_of(work, struct ib_mad_agent_private, local_work);
2687 
2688 	opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
2689 			       mad_agent_priv->qp_info->port_priv->port_num);
2690 
2691 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2692 	while (!list_empty(&mad_agent_priv->local_list)) {
2693 		local = list_entry(mad_agent_priv->local_list.next,
2694 				   struct ib_mad_local_private,
2695 				   completion_list);
2696 		list_del(&local->completion_list);
2697 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2698 		free_mad = 0;
2699 		if (local->mad_priv) {
2700 			u8 base_version;
2701 			recv_mad_agent = local->recv_mad_agent;
2702 			if (!recv_mad_agent) {
2703 				dev_err(&mad_agent_priv->agent.device->dev,
2704 					"No receive MAD agent for local completion\n");
2705 				free_mad = 1;
2706 				goto local_send_completion;
2707 			}
2708 
2709 			/*
2710 			 * Defined behavior is to complete response
2711 			 * before request
2712 			 */
2713 			build_smp_wc(recv_mad_agent->agent.qp,
2714 				     (unsigned long) local->mad_send_wr,
2715 				     be16_to_cpu(IB_LID_PERMISSIVE),
2716 				     local->mad_send_wr->send_wr.wr.ud.pkey_index,
2717 				     recv_mad_agent->agent.port_num, &wc);
2718 
2719 			local->mad_priv->header.recv_wc.wc = &wc;
2720 
2721 			base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version;
2722 			if (opa && base_version == OPA_MGMT_BASE_VERSION) {
2723 				local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len;
2724 				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2725 			} else {
2726 				local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2727 				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2728 			}
2729 
2730 			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2731 			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2732 				 &local->mad_priv->header.recv_wc.rmpp_list);
2733 			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2734 			local->mad_priv->header.recv_wc.recv_buf.mad =
2735 						(struct ib_mad *)local->mad_priv->mad;
2736 			if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
2737 				snoop_recv(recv_mad_agent->qp_info,
2738 					  &local->mad_priv->header.recv_wc,
2739 					   IB_MAD_SNOOP_RECVS);
2740 			recv_mad_agent->agent.recv_handler(
2741 						&recv_mad_agent->agent,
2742 						&local->mad_priv->header.recv_wc);
2743 			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2744 			atomic_dec(&recv_mad_agent->refcount);
2745 			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2746 		}
2747 
2748 local_send_completion:
2749 		/* Complete send */
2750 		mad_send_wc.status = IB_WC_SUCCESS;
2751 		mad_send_wc.vendor_err = 0;
2752 		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2753 		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2754 			snoop_send(mad_agent_priv->qp_info,
2755 				   &local->mad_send_wr->send_buf,
2756 				   &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
2757 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2758 						   &mad_send_wc);
2759 
2760 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2761 		atomic_dec(&mad_agent_priv->refcount);
2762 		if (free_mad)
2763 			kfree(local->mad_priv);
2764 		kfree(local);
2765 	}
2766 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2767 }
2768 
2769 static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2770 {
2771 	int ret;
2772 
2773 	if (!mad_send_wr->retries_left)
2774 		return -ETIMEDOUT;
2775 
2776 	mad_send_wr->retries_left--;
2777 	mad_send_wr->send_buf.retries++;
2778 
2779 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2780 
2781 	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2782 		ret = ib_retry_rmpp(mad_send_wr);
2783 		switch (ret) {
2784 		case IB_RMPP_RESULT_UNHANDLED:
2785 			ret = ib_send_mad(mad_send_wr);
2786 			break;
2787 		case IB_RMPP_RESULT_CONSUMED:
2788 			ret = 0;
2789 			break;
2790 		default:
2791 			ret = -ECOMM;
2792 			break;
2793 		}
2794 	} else
2795 		ret = ib_send_mad(mad_send_wr);
2796 
2797 	if (!ret) {
2798 		mad_send_wr->refcount++;
2799 		list_add_tail(&mad_send_wr->agent_list,
2800 			      &mad_send_wr->mad_agent_priv->send_list);
2801 	}
2802 	return ret;
2803 }
2804 
2805 static void timeout_sends(struct work_struct *work)
2806 {
2807 	struct ib_mad_agent_private *mad_agent_priv;
2808 	struct ib_mad_send_wr_private *mad_send_wr;
2809 	struct ib_mad_send_wc mad_send_wc;
2810 	unsigned long flags, delay;
2811 
2812 	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2813 				      timed_work.work);
2814 	mad_send_wc.vendor_err = 0;
2815 
2816 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2817 	while (!list_empty(&mad_agent_priv->wait_list)) {
2818 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2819 					 struct ib_mad_send_wr_private,
2820 					 agent_list);
2821 
2822 		if (time_after(mad_send_wr->timeout, jiffies)) {
2823 			delay = mad_send_wr->timeout - jiffies;
2824 			if ((long)delay <= 0)
2825 				delay = 1;
2826 			queue_delayed_work(mad_agent_priv->qp_info->
2827 					   port_priv->wq,
2828 					   &mad_agent_priv->timed_work, delay);
2829 			break;
2830 		}
2831 
2832 		list_del(&mad_send_wr->agent_list);
2833 		if (mad_send_wr->status == IB_WC_SUCCESS &&
2834 		    !retry_send(mad_send_wr))
2835 			continue;
2836 
2837 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2838 
2839 		if (mad_send_wr->status == IB_WC_SUCCESS)
2840 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2841 		else
2842 			mad_send_wc.status = mad_send_wr->status;
2843 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2844 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2845 						   &mad_send_wc);
2846 
2847 		atomic_dec(&mad_agent_priv->refcount);
2848 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2849 	}
2850 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2851 }
2852 
2853 static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2854 {
2855 	struct ib_mad_port_private *port_priv = cq->cq_context;
2856 	unsigned long flags;
2857 
2858 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2859 	if (!list_empty(&port_priv->port_list))
2860 		queue_work(port_priv->wq, &port_priv->work);
2861 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2862 }
2863 
2864 /*
2865  * Allocate receive MADs and post receive WRs for them
2866  */
2867 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2868 				    struct ib_mad_private *mad)
2869 {
2870 	unsigned long flags;
2871 	int post, ret;
2872 	struct ib_mad_private *mad_priv;
2873 	struct ib_sge sg_list;
2874 	struct ib_recv_wr recv_wr, *bad_recv_wr;
2875 	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2876 
2877 	/* Initialize common scatter list fields */
2878 	sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
2879 
2880 	/* Initialize common receive WR fields */
2881 	recv_wr.next = NULL;
2882 	recv_wr.sg_list = &sg_list;
2883 	recv_wr.num_sge = 1;
2884 
2885 	do {
2886 		/* Allocate and map receive buffer */
2887 		if (mad) {
2888 			mad_priv = mad;
2889 			mad = NULL;
2890 		} else {
2891 			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
2892 						     GFP_ATOMIC);
2893 			if (!mad_priv) {
2894 				dev_err(&qp_info->port_priv->device->dev,
2895 					"No memory for receive buffer\n");
2896 				ret = -ENOMEM;
2897 				break;
2898 			}
2899 		}
2900 		sg_list.length = mad_priv_dma_size(mad_priv);
2901 		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2902 						 &mad_priv->grh,
2903 						 mad_priv_dma_size(mad_priv),
2904 						 DMA_FROM_DEVICE);
2905 		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
2906 						  sg_list.addr))) {
2907 			ret = -ENOMEM;
2908 			break;
2909 		}
2910 		mad_priv->header.mapping = sg_list.addr;
2911 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2912 		mad_priv->header.mad_list.mad_queue = recv_queue;
2913 
2914 		/* Post receive WR */
2915 		spin_lock_irqsave(&recv_queue->lock, flags);
2916 		post = (++recv_queue->count < recv_queue->max_active);
2917 		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2918 		spin_unlock_irqrestore(&recv_queue->lock, flags);
2919 		ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
2920 		if (ret) {
2921 			spin_lock_irqsave(&recv_queue->lock, flags);
2922 			list_del(&mad_priv->header.mad_list.list);
2923 			recv_queue->count--;
2924 			spin_unlock_irqrestore(&recv_queue->lock, flags);
2925 			ib_dma_unmap_single(qp_info->port_priv->device,
2926 					    mad_priv->header.mapping,
2927 					    mad_priv_dma_size(mad_priv),
2928 					    DMA_FROM_DEVICE);
2929 			kfree(mad_priv);
2930 			dev_err(&qp_info->port_priv->device->dev,
2931 				"ib_post_recv failed: %d\n", ret);
2932 			break;
2933 		}
2934 	} while (post);
2935 
2936 	return ret;
2937 }
2938 
2939 /*
2940  * Return all the posted receive MADs
2941  */
2942 static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2943 {
2944 	struct ib_mad_private_header *mad_priv_hdr;
2945 	struct ib_mad_private *recv;
2946 	struct ib_mad_list_head *mad_list;
2947 
2948 	if (!qp_info->qp)
2949 		return;
2950 
2951 	while (!list_empty(&qp_info->recv_queue.list)) {
2952 
2953 		mad_list = list_entry(qp_info->recv_queue.list.next,
2954 				      struct ib_mad_list_head, list);
2955 		mad_priv_hdr = container_of(mad_list,
2956 					    struct ib_mad_private_header,
2957 					    mad_list);
2958 		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2959 				    header);
2960 
2961 		/* Remove from posted receive MAD list */
2962 		list_del(&mad_list->list);
2963 
2964 		ib_dma_unmap_single(qp_info->port_priv->device,
2965 				    recv->header.mapping,
2966 				    mad_priv_dma_size(recv),
2967 				    DMA_FROM_DEVICE);
2968 		kfree(recv);
2969 	}
2970 
2971 	qp_info->recv_queue.count = 0;
2972 }
2973 
2974 /*
2975  * Start the port
2976  */
2977 static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2978 {
2979 	int ret, i;
2980 	struct ib_qp_attr *attr;
2981 	struct ib_qp *qp;
2982 	u16 pkey_index;
2983 
2984 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2985 	if (!attr) {
2986 		dev_err(&port_priv->device->dev,
2987 			"Couldn't kmalloc ib_qp_attr\n");
2988 		return -ENOMEM;
2989 	}
2990 
2991 	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
2992 			   IB_DEFAULT_PKEY_FULL, &pkey_index);
2993 	if (ret)
2994 		pkey_index = 0;
2995 
2996 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2997 		qp = port_priv->qp_info[i].qp;
2998 		if (!qp)
2999 			continue;
3000 
3001 		/*
3002 		 * PKey index for QP1 is irrelevant but
3003 		 * one is needed for the Reset to Init transition
3004 		 */
3005 		attr->qp_state = IB_QPS_INIT;
3006 		attr->pkey_index = pkey_index;
3007 		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
3008 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
3009 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
3010 		if (ret) {
3011 			dev_err(&port_priv->device->dev,
3012 				"Couldn't change QP%d state to INIT: %d\n",
3013 				i, ret);
3014 			goto out;
3015 		}
3016 
3017 		attr->qp_state = IB_QPS_RTR;
3018 		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
3019 		if (ret) {
3020 			dev_err(&port_priv->device->dev,
3021 				"Couldn't change QP%d state to RTR: %d\n",
3022 				i, ret);
3023 			goto out;
3024 		}
3025 
3026 		attr->qp_state = IB_QPS_RTS;
3027 		attr->sq_psn = IB_MAD_SEND_Q_PSN;
3028 		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
3029 		if (ret) {
3030 			dev_err(&port_priv->device->dev,
3031 				"Couldn't change QP%d state to RTS: %d\n",
3032 				i, ret);
3033 			goto out;
3034 		}
3035 	}
3036 
3037 	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
3038 	if (ret) {
3039 		dev_err(&port_priv->device->dev,
3040 			"Failed to request completion notification: %d\n",
3041 			ret);
3042 		goto out;
3043 	}
3044 
3045 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
3046 		if (!port_priv->qp_info[i].qp)
3047 			continue;
3048 
3049 		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
3050 		if (ret) {
3051 			dev_err(&port_priv->device->dev,
3052 				"Couldn't post receive WRs\n");
3053 			goto out;
3054 		}
3055 	}
3056 out:
3057 	kfree(attr);
3058 	return ret;
3059 }
3060 
3061 static void qp_event_handler(struct ib_event *event, void *qp_context)
3062 {
3063 	struct ib_mad_qp_info	*qp_info = qp_context;
3064 
3065 	/* It's worse than that! He's dead, Jim! */
3066 	dev_err(&qp_info->port_priv->device->dev,
3067 		"Fatal error (%d) on MAD QP (%d)\n",
3068 		event->event, qp_info->qp->qp_num);
3069 }
3070 
3071 static void init_mad_queue(struct ib_mad_qp_info *qp_info,
3072 			   struct ib_mad_queue *mad_queue)
3073 {
3074 	mad_queue->qp_info = qp_info;
3075 	mad_queue->count = 0;
3076 	spin_lock_init(&mad_queue->lock);
3077 	INIT_LIST_HEAD(&mad_queue->list);
3078 }
3079 
3080 static void init_mad_qp(struct ib_mad_port_private *port_priv,
3081 			struct ib_mad_qp_info *qp_info)
3082 {
3083 	qp_info->port_priv = port_priv;
3084 	init_mad_queue(qp_info, &qp_info->send_queue);
3085 	init_mad_queue(qp_info, &qp_info->recv_queue);
3086 	INIT_LIST_HEAD(&qp_info->overflow_list);
3087 	spin_lock_init(&qp_info->snoop_lock);
3088 	qp_info->snoop_table = NULL;
3089 	qp_info->snoop_table_size = 0;
3090 	atomic_set(&qp_info->snoop_count, 0);
3091 }
3092 
3093 static int create_mad_qp(struct ib_mad_qp_info *qp_info,
3094 			 enum ib_qp_type qp_type)
3095 {
3096 	struct ib_qp_init_attr	qp_init_attr;
3097 	int ret;
3098 
3099 	memset(&qp_init_attr, 0, sizeof qp_init_attr);
3100 	qp_init_attr.send_cq = qp_info->port_priv->cq;
3101 	qp_init_attr.recv_cq = qp_info->port_priv->cq;
3102 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
3103 	qp_init_attr.cap.max_send_wr = mad_sendq_size;
3104 	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
3105 	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
3106 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
3107 	qp_init_attr.qp_type = qp_type;
3108 	qp_init_attr.port_num = qp_info->port_priv->port_num;
3109 	qp_init_attr.qp_context = qp_info;
3110 	qp_init_attr.event_handler = qp_event_handler;
3111 	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
3112 	if (IS_ERR(qp_info->qp)) {
3113 		dev_err(&qp_info->port_priv->device->dev,
3114 			"Couldn't create ib_mad QP%d\n",
3115 			get_spl_qp_index(qp_type));
3116 		ret = PTR_ERR(qp_info->qp);
3117 		goto error;
3118 	}
3119 	/* Use minimum queue sizes unless the CQ is resized */
3120 	qp_info->send_queue.max_active = mad_sendq_size;
3121 	qp_info->recv_queue.max_active = mad_recvq_size;
3122 	return 0;
3123 
3124 error:
3125 	return ret;
3126 }
3127 
3128 static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
3129 {
3130 	if (!qp_info->qp)
3131 		return;
3132 
3133 	ib_destroy_qp(qp_info->qp);
3134 	kfree(qp_info->snoop_table);
3135 }
3136 
3137 /*
3138  * Open the port
3139  * Create the QP, PD, MR, and CQ if needed
3140  */
3141 static int ib_mad_port_open(struct ib_device *device,
3142 			    int port_num)
3143 {
3144 	int ret, cq_size;
3145 	struct ib_mad_port_private *port_priv;
3146 	unsigned long flags;
3147 	char name[sizeof "ib_mad123"];
3148 	int has_smi;
3149 	struct ib_cq_init_attr cq_attr = {};
3150 
3151 	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
3152 		return -EFAULT;
3153 
3154 	if (WARN_ON(rdma_cap_opa_mad(device, port_num) &&
3155 		    rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE))
3156 		return -EFAULT;
3157 
3158 	/* Create new device info */
3159 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
3160 	if (!port_priv) {
3161 		dev_err(&device->dev, "No memory for ib_mad_port_private\n");
3162 		return -ENOMEM;
3163 	}
3164 
3165 	port_priv->device = device;
3166 	port_priv->port_num = port_num;
3167 	spin_lock_init(&port_priv->reg_lock);
3168 	INIT_LIST_HEAD(&port_priv->agent_list);
3169 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
3170 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
3171 
3172 	cq_size = mad_sendq_size + mad_recvq_size;
3173 	has_smi = rdma_cap_ib_smi(device, port_num);
3174 	if (has_smi)
3175 		cq_size *= 2;
3176 
3177 	cq_attr.cqe = cq_size;
3178 	port_priv->cq = ib_create_cq(port_priv->device,
3179 				     ib_mad_thread_completion_handler,
3180 				     NULL, port_priv, &cq_attr);
3181 	if (IS_ERR(port_priv->cq)) {
3182 		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
3183 		ret = PTR_ERR(port_priv->cq);
3184 		goto error3;
3185 	}
3186 
3187 	port_priv->pd = ib_alloc_pd(device);
3188 	if (IS_ERR(port_priv->pd)) {
3189 		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
3190 		ret = PTR_ERR(port_priv->pd);
3191 		goto error4;
3192 	}
3193 
3194 	if (has_smi) {
3195 		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
3196 		if (ret)
3197 			goto error6;
3198 	}
3199 	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
3200 	if (ret)
3201 		goto error7;
3202 
3203 	snprintf(name, sizeof name, "ib_mad%d", port_num);
3204 	port_priv->wq = create_singlethread_workqueue(name);
3205 	if (!port_priv->wq) {
3206 		ret = -ENOMEM;
3207 		goto error8;
3208 	}
3209 	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
3210 
3211 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3212 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
3213 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3214 
3215 	ret = ib_mad_port_start(port_priv);
3216 	if (ret) {
3217 		dev_err(&device->dev, "Couldn't start port\n");
3218 		goto error9;
3219 	}
3220 
3221 	return 0;
3222 
3223 error9:
3224 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3225 	list_del_init(&port_priv->port_list);
3226 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3227 
3228 	destroy_workqueue(port_priv->wq);
3229 error8:
3230 	destroy_mad_qp(&port_priv->qp_info[1]);
3231 error7:
3232 	destroy_mad_qp(&port_priv->qp_info[0]);
3233 error6:
3234 	ib_dealloc_pd(port_priv->pd);
3235 error4:
3236 	ib_destroy_cq(port_priv->cq);
3237 	cleanup_recv_queue(&port_priv->qp_info[1]);
3238 	cleanup_recv_queue(&port_priv->qp_info[0]);
3239 error3:
3240 	kfree(port_priv);
3241 
3242 	return ret;
3243 }
3244 
3245 /*
3246  * Close the port
3247  * If there are no classes using the port, free the port
3248  * resources (CQ, MR, PD, QP) and remove the port's info structure
3249  */
3250 static int ib_mad_port_close(struct ib_device *device, int port_num)
3251 {
3252 	struct ib_mad_port_private *port_priv;
3253 	unsigned long flags;
3254 
3255 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3256 	port_priv = __ib_get_mad_port(device, port_num);
3257 	if (port_priv == NULL) {
3258 		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3259 		dev_err(&device->dev, "Port %d not found\n", port_num);
3260 		return -ENODEV;
3261 	}
3262 	list_del_init(&port_priv->port_list);
3263 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3264 
3265 	destroy_workqueue(port_priv->wq);
3266 	destroy_mad_qp(&port_priv->qp_info[1]);
3267 	destroy_mad_qp(&port_priv->qp_info[0]);
3268 	ib_dealloc_pd(port_priv->pd);
3269 	ib_destroy_cq(port_priv->cq);
3270 	cleanup_recv_queue(&port_priv->qp_info[1]);
3271 	cleanup_recv_queue(&port_priv->qp_info[0]);
3272 	/* XXX: Handle deallocation of MAD registration tables */
3273 
3274 	kfree(port_priv);
3275 
3276 	return 0;
3277 }
3278 
3279 static void ib_mad_init_device(struct ib_device *device)
3280 {
3281 	int start, i;
3282 
3283 	start = rdma_start_port(device);
3284 
3285 	for (i = start; i <= rdma_end_port(device); i++) {
3286 		if (!rdma_cap_ib_mad(device, i))
3287 			continue;
3288 
3289 		if (ib_mad_port_open(device, i)) {
3290 			dev_err(&device->dev, "Couldn't open port %d\n", i);
3291 			goto error;
3292 		}
3293 		if (ib_agent_port_open(device, i)) {
3294 			dev_err(&device->dev,
3295 				"Couldn't open port %d for agents\n", i);
3296 			goto error_agent;
3297 		}
3298 	}
3299 	return;
3300 
3301 error_agent:
3302 	if (ib_mad_port_close(device, i))
3303 		dev_err(&device->dev, "Couldn't close port %d\n", i);
3304 
3305 error:
3306 	while (--i >= start) {
3307 		if (!rdma_cap_ib_mad(device, i))
3308 			continue;
3309 
3310 		if (ib_agent_port_close(device, i))
3311 			dev_err(&device->dev,
3312 				"Couldn't close port %d for agents\n", i);
3313 		if (ib_mad_port_close(device, i))
3314 			dev_err(&device->dev, "Couldn't close port %d\n", i);
3315 	}
3316 }
3317 
3318 static void ib_mad_remove_device(struct ib_device *device, void *client_data)
3319 {
3320 	int i;
3321 
3322 	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
3323 		if (!rdma_cap_ib_mad(device, i))
3324 			continue;
3325 
3326 		if (ib_agent_port_close(device, i))
3327 			dev_err(&device->dev,
3328 				"Couldn't close port %d for agents\n", i);
3329 		if (ib_mad_port_close(device, i))
3330 			dev_err(&device->dev, "Couldn't close port %d\n", i);
3331 	}
3332 }
3333 
3334 static struct ib_client mad_client = {
3335 	.name   = "mad",
3336 	.add = ib_mad_init_device,
3337 	.remove = ib_mad_remove_device
3338 };
3339 
3340 static int __init ib_mad_init_module(void)
3341 {
3342 	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3343 	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3344 
3345 	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3346 	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3347 
3348 	INIT_LIST_HEAD(&ib_mad_port_list);
3349 
3350 	if (ib_register_client(&mad_client)) {
3351 		pr_err("Couldn't register ib_mad client\n");
3352 		return -EINVAL;
3353 	}
3354 
3355 	return 0;
3356 }
3357 
3358 static void __exit ib_mad_cleanup_module(void)
3359 {
3360 	ib_unregister_client(&mad_client);
3361 }
3362 
3363 module_init(ib_mad_init_module);
3364 module_exit(ib_mad_cleanup_module);
3365