1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $
33  */
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/err.h>
38 #include <linux/random.h>
39 #include <linux/spinlock.h>
40 #include <linux/slab.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/kref.h>
44 #include <linux/idr.h>
45 
46 #include <ib_pack.h>
47 #include <ib_sa.h>
48 
49 MODULE_AUTHOR("Roland Dreier");
50 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
51 MODULE_LICENSE("Dual BSD/GPL");
52 
53 /*
54  * These two structures must be packed because they have 64-bit fields
55  * that are only 32-bit aligned.  64-bit architectures will lay them
56  * out wrong otherwise.  (And unfortunately they are sent on the wire
57  * so we can't change the layout)
58  */
59 struct ib_sa_hdr {
60 	u64			sm_key;
61 	u16			attr_offset;
62 	u16			reserved;
63 	ib_sa_comp_mask		comp_mask;
64 } __attribute__ ((packed));
65 
66 struct ib_sa_mad {
67 	struct ib_mad_hdr	mad_hdr;
68 	struct ib_rmpp_hdr	rmpp_hdr;
69 	struct ib_sa_hdr	sa_hdr;
70 	u8			data[200];
71 } __attribute__ ((packed));
72 
73 struct ib_sa_sm_ah {
74 	struct ib_ah        *ah;
75 	struct kref          ref;
76 };
77 
78 struct ib_sa_port {
79 	struct ib_mad_agent *agent;
80 	struct ib_mr        *mr;
81 	struct ib_sa_sm_ah  *sm_ah;
82 	struct work_struct   update_task;
83 	spinlock_t           ah_lock;
84 	u8                   port_num;
85 };
86 
87 struct ib_sa_device {
88 	int                     start_port, end_port;
89 	struct ib_event_handler event_handler;
90 	struct ib_sa_port port[0];
91 };
92 
93 struct ib_sa_query {
94 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
95 	void (*release)(struct ib_sa_query *);
96 	struct ib_sa_port  *port;
97 	struct ib_sa_mad   *mad;
98 	struct ib_sa_sm_ah *sm_ah;
99 	DECLARE_PCI_UNMAP_ADDR(mapping)
100 	int                 id;
101 };
102 
103 struct ib_sa_path_query {
104 	void (*callback)(int, struct ib_sa_path_rec *, void *);
105 	void *context;
106 	struct ib_sa_query sa_query;
107 };
108 
109 struct ib_sa_mcmember_query {
110 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
111 	void *context;
112 	struct ib_sa_query sa_query;
113 };
114 
115 static void ib_sa_add_one(struct ib_device *device);
116 static void ib_sa_remove_one(struct ib_device *device);
117 
118 static struct ib_client sa_client = {
119 	.name   = "sa",
120 	.add    = ib_sa_add_one,
121 	.remove = ib_sa_remove_one
122 };
123 
124 static spinlock_t idr_lock;
125 static DEFINE_IDR(query_idr);
126 
127 static spinlock_t tid_lock;
128 static u32 tid;
129 
130 enum {
131 	IB_SA_ATTR_CLASS_PORTINFO    = 0x01,
132 	IB_SA_ATTR_NOTICE	     = 0x02,
133 	IB_SA_ATTR_INFORM_INFO	     = 0x03,
134 	IB_SA_ATTR_NODE_REC	     = 0x11,
135 	IB_SA_ATTR_PORT_INFO_REC     = 0x12,
136 	IB_SA_ATTR_SL2VL_REC	     = 0x13,
137 	IB_SA_ATTR_SWITCH_REC	     = 0x14,
138 	IB_SA_ATTR_LINEAR_FDB_REC    = 0x15,
139 	IB_SA_ATTR_RANDOM_FDB_REC    = 0x16,
140 	IB_SA_ATTR_MCAST_FDB_REC     = 0x17,
141 	IB_SA_ATTR_SM_INFO_REC	     = 0x18,
142 	IB_SA_ATTR_LINK_REC	     = 0x20,
143 	IB_SA_ATTR_GUID_INFO_REC     = 0x30,
144 	IB_SA_ATTR_SERVICE_REC	     = 0x31,
145 	IB_SA_ATTR_PARTITION_REC     = 0x33,
146 	IB_SA_ATTR_RANGE_REC	     = 0x34,
147 	IB_SA_ATTR_PATH_REC	     = 0x35,
148 	IB_SA_ATTR_VL_ARB_REC	     = 0x36,
149 	IB_SA_ATTR_MC_GROUP_REC	     = 0x37,
150 	IB_SA_ATTR_MC_MEMBER_REC     = 0x38,
151 	IB_SA_ATTR_TRACE_REC	     = 0x39,
152 	IB_SA_ATTR_MULTI_PATH_REC    = 0x3a,
153 	IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b
154 };
155 
156 #define PATH_REC_FIELD(field) \
157 	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
158 	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
159 	.field_name          = "sa_path_rec:" #field
160 
161 static const struct ib_field path_rec_table[] = {
162 	{ RESERVED,
163 	  .offset_words = 0,
164 	  .offset_bits  = 0,
165 	  .size_bits    = 32 },
166 	{ RESERVED,
167 	  .offset_words = 1,
168 	  .offset_bits  = 0,
169 	  .size_bits    = 32 },
170 	{ PATH_REC_FIELD(dgid),
171 	  .offset_words = 2,
172 	  .offset_bits  = 0,
173 	  .size_bits    = 128 },
174 	{ PATH_REC_FIELD(sgid),
175 	  .offset_words = 6,
176 	  .offset_bits  = 0,
177 	  .size_bits    = 128 },
178 	{ PATH_REC_FIELD(dlid),
179 	  .offset_words = 10,
180 	  .offset_bits  = 0,
181 	  .size_bits    = 16 },
182 	{ PATH_REC_FIELD(slid),
183 	  .offset_words = 10,
184 	  .offset_bits  = 16,
185 	  .size_bits    = 16 },
186 	{ PATH_REC_FIELD(raw_traffic),
187 	  .offset_words = 11,
188 	  .offset_bits  = 0,
189 	  .size_bits    = 1 },
190 	{ RESERVED,
191 	  .offset_words = 11,
192 	  .offset_bits  = 1,
193 	  .size_bits    = 3 },
194 	{ PATH_REC_FIELD(flow_label),
195 	  .offset_words = 11,
196 	  .offset_bits  = 4,
197 	  .size_bits    = 20 },
198 	{ PATH_REC_FIELD(hop_limit),
199 	  .offset_words = 11,
200 	  .offset_bits  = 24,
201 	  .size_bits    = 8 },
202 	{ PATH_REC_FIELD(traffic_class),
203 	  .offset_words = 12,
204 	  .offset_bits  = 0,
205 	  .size_bits    = 8 },
206 	{ PATH_REC_FIELD(reversible),
207 	  .offset_words = 12,
208 	  .offset_bits  = 8,
209 	  .size_bits    = 1 },
210 	{ PATH_REC_FIELD(numb_path),
211 	  .offset_words = 12,
212 	  .offset_bits  = 9,
213 	  .size_bits    = 7 },
214 	{ PATH_REC_FIELD(pkey),
215 	  .offset_words = 12,
216 	  .offset_bits  = 16,
217 	  .size_bits    = 16 },
218 	{ RESERVED,
219 	  .offset_words = 13,
220 	  .offset_bits  = 0,
221 	  .size_bits    = 12 },
222 	{ PATH_REC_FIELD(sl),
223 	  .offset_words = 13,
224 	  .offset_bits  = 12,
225 	  .size_bits    = 4 },
226 	{ PATH_REC_FIELD(mtu_selector),
227 	  .offset_words = 13,
228 	  .offset_bits  = 16,
229 	  .size_bits    = 2 },
230 	{ PATH_REC_FIELD(mtu),
231 	  .offset_words = 13,
232 	  .offset_bits  = 18,
233 	  .size_bits    = 6 },
234 	{ PATH_REC_FIELD(rate_selector),
235 	  .offset_words = 13,
236 	  .offset_bits  = 24,
237 	  .size_bits    = 2 },
238 	{ PATH_REC_FIELD(rate),
239 	  .offset_words = 13,
240 	  .offset_bits  = 26,
241 	  .size_bits    = 6 },
242 	{ PATH_REC_FIELD(packet_life_time_selector),
243 	  .offset_words = 14,
244 	  .offset_bits  = 0,
245 	  .size_bits    = 2 },
246 	{ PATH_REC_FIELD(packet_life_time),
247 	  .offset_words = 14,
248 	  .offset_bits  = 2,
249 	  .size_bits    = 6 },
250 	{ PATH_REC_FIELD(preference),
251 	  .offset_words = 14,
252 	  .offset_bits  = 8,
253 	  .size_bits    = 8 },
254 	{ RESERVED,
255 	  .offset_words = 14,
256 	  .offset_bits  = 16,
257 	  .size_bits    = 48 },
258 };
259 
260 #define MCMEMBER_REC_FIELD(field) \
261 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
262 	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
263 	.field_name          = "sa_mcmember_rec:" #field
264 
265 static const struct ib_field mcmember_rec_table[] = {
266 	{ MCMEMBER_REC_FIELD(mgid),
267 	  .offset_words = 0,
268 	  .offset_bits  = 0,
269 	  .size_bits    = 128 },
270 	{ MCMEMBER_REC_FIELD(port_gid),
271 	  .offset_words = 4,
272 	  .offset_bits  = 0,
273 	  .size_bits    = 128 },
274 	{ MCMEMBER_REC_FIELD(qkey),
275 	  .offset_words = 8,
276 	  .offset_bits  = 0,
277 	  .size_bits    = 32 },
278 	{ MCMEMBER_REC_FIELD(mlid),
279 	  .offset_words = 9,
280 	  .offset_bits  = 0,
281 	  .size_bits    = 16 },
282 	{ MCMEMBER_REC_FIELD(mtu_selector),
283 	  .offset_words = 9,
284 	  .offset_bits  = 16,
285 	  .size_bits    = 2 },
286 	{ MCMEMBER_REC_FIELD(mtu),
287 	  .offset_words = 9,
288 	  .offset_bits  = 18,
289 	  .size_bits    = 6 },
290 	{ MCMEMBER_REC_FIELD(traffic_class),
291 	  .offset_words = 9,
292 	  .offset_bits  = 24,
293 	  .size_bits    = 8 },
294 	{ MCMEMBER_REC_FIELD(pkey),
295 	  .offset_words = 10,
296 	  .offset_bits  = 0,
297 	  .size_bits    = 16 },
298 	{ MCMEMBER_REC_FIELD(rate_selector),
299 	  .offset_words = 10,
300 	  .offset_bits  = 16,
301 	  .size_bits    = 2 },
302 	{ MCMEMBER_REC_FIELD(rate),
303 	  .offset_words = 10,
304 	  .offset_bits  = 18,
305 	  .size_bits    = 6 },
306 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
307 	  .offset_words = 10,
308 	  .offset_bits  = 24,
309 	  .size_bits    = 2 },
310 	{ MCMEMBER_REC_FIELD(packet_life_time),
311 	  .offset_words = 10,
312 	  .offset_bits  = 26,
313 	  .size_bits    = 6 },
314 	{ MCMEMBER_REC_FIELD(sl),
315 	  .offset_words = 11,
316 	  .offset_bits  = 0,
317 	  .size_bits    = 4 },
318 	{ MCMEMBER_REC_FIELD(flow_label),
319 	  .offset_words = 11,
320 	  .offset_bits  = 4,
321 	  .size_bits    = 20 },
322 	{ MCMEMBER_REC_FIELD(hop_limit),
323 	  .offset_words = 11,
324 	  .offset_bits  = 24,
325 	  .size_bits    = 8 },
326 	{ MCMEMBER_REC_FIELD(scope),
327 	  .offset_words = 12,
328 	  .offset_bits  = 0,
329 	  .size_bits    = 4 },
330 	{ MCMEMBER_REC_FIELD(join_state),
331 	  .offset_words = 12,
332 	  .offset_bits  = 4,
333 	  .size_bits    = 4 },
334 	{ MCMEMBER_REC_FIELD(proxy_join),
335 	  .offset_words = 12,
336 	  .offset_bits  = 8,
337 	  .size_bits    = 1 },
338 	{ RESERVED,
339 	  .offset_words = 12,
340 	  .offset_bits  = 9,
341 	  .size_bits    = 23 },
342 };
343 
344 static void free_sm_ah(struct kref *kref)
345 {
346 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
347 
348 	ib_destroy_ah(sm_ah->ah);
349 	kfree(sm_ah);
350 }
351 
352 static void update_sm_ah(void *port_ptr)
353 {
354 	struct ib_sa_port *port = port_ptr;
355 	struct ib_sa_sm_ah *new_ah, *old_ah;
356 	struct ib_port_attr port_attr;
357 	struct ib_ah_attr   ah_attr;
358 
359 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
360 		printk(KERN_WARNING "Couldn't query port\n");
361 		return;
362 	}
363 
364 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
365 	if (!new_ah) {
366 		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
367 		return;
368 	}
369 
370 	kref_init(&new_ah->ref);
371 
372 	memset(&ah_attr, 0, sizeof ah_attr);
373 	ah_attr.dlid     = port_attr.sm_lid;
374 	ah_attr.sl       = port_attr.sm_sl;
375 	ah_attr.port_num = port->port_num;
376 
377 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
378 	if (IS_ERR(new_ah->ah)) {
379 		printk(KERN_WARNING "Couldn't create new SM AH\n");
380 		kfree(new_ah);
381 		return;
382 	}
383 
384 	spin_lock_irq(&port->ah_lock);
385 	old_ah = port->sm_ah;
386 	port->sm_ah = new_ah;
387 	spin_unlock_irq(&port->ah_lock);
388 
389 	if (old_ah)
390 		kref_put(&old_ah->ref, free_sm_ah);
391 }
392 
393 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
394 {
395 	if (event->event == IB_EVENT_PORT_ERR    ||
396 	    event->event == IB_EVENT_PORT_ACTIVE ||
397 	    event->event == IB_EVENT_LID_CHANGE  ||
398 	    event->event == IB_EVENT_PKEY_CHANGE ||
399 	    event->event == IB_EVENT_SM_CHANGE) {
400 		struct ib_sa_device *sa_dev =
401 			ib_get_client_data(event->device, &sa_client);
402 
403 		schedule_work(&sa_dev->port[event->element.port_num -
404 					    sa_dev->start_port].update_task);
405 	}
406 }
407 
408 /**
409  * ib_sa_cancel_query - try to cancel an SA query
410  * @id:ID of query to cancel
411  * @query:query pointer to cancel
412  *
413  * Try to cancel an SA query.  If the id and query don't match up or
414  * the query has already completed, nothing is done.  Otherwise the
415  * query is canceled and will complete with a status of -EINTR.
416  */
417 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
418 {
419 	unsigned long flags;
420 	struct ib_mad_agent *agent;
421 
422 	spin_lock_irqsave(&idr_lock, flags);
423 	if (idr_find(&query_idr, id) != query) {
424 		spin_unlock_irqrestore(&idr_lock, flags);
425 		return;
426 	}
427 	agent = query->port->agent;
428 	spin_unlock_irqrestore(&idr_lock, flags);
429 
430 	ib_cancel_mad(agent, id);
431 }
432 EXPORT_SYMBOL(ib_sa_cancel_query);
433 
434 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
435 {
436 	unsigned long flags;
437 
438 	memset(mad, 0, sizeof *mad);
439 
440 	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
441 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
442 	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
443 
444 	spin_lock_irqsave(&tid_lock, flags);
445 	mad->mad_hdr.tid           =
446 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
447 	spin_unlock_irqrestore(&tid_lock, flags);
448 }
449 
450 static int send_mad(struct ib_sa_query *query, int timeout_ms)
451 {
452 	struct ib_sa_port *port = query->port;
453 	unsigned long flags;
454 	int ret;
455 	struct ib_sge      gather_list;
456 	struct ib_send_wr *bad_wr, wr = {
457 		.opcode      = IB_WR_SEND,
458 		.sg_list     = &gather_list,
459 		.num_sge     = 1,
460 		.send_flags  = IB_SEND_SIGNALED,
461 		.wr	     = {
462 			 .ud = {
463 				 .mad_hdr     = &query->mad->mad_hdr,
464 				 .remote_qpn  = 1,
465 				 .remote_qkey = IB_QP1_QKEY,
466 				 .timeout_ms  = timeout_ms
467 			 }
468 		 }
469 	};
470 
471 retry:
472 	if (!idr_pre_get(&query_idr, GFP_ATOMIC))
473 		return -ENOMEM;
474 	spin_lock_irqsave(&idr_lock, flags);
475 	ret = idr_get_new(&query_idr, query, &query->id);
476 	spin_unlock_irqrestore(&idr_lock, flags);
477 	if (ret == -EAGAIN)
478 		goto retry;
479 	if (ret)
480 		return ret;
481 
482 	wr.wr_id = query->id;
483 
484 	spin_lock_irqsave(&port->ah_lock, flags);
485 	kref_get(&port->sm_ah->ref);
486 	query->sm_ah = port->sm_ah;
487 	wr.wr.ud.ah  = port->sm_ah->ah;
488 	spin_unlock_irqrestore(&port->ah_lock, flags);
489 
490 	gather_list.addr   = dma_map_single(port->agent->device->dma_device,
491 					    query->mad,
492 					    sizeof (struct ib_sa_mad),
493 					    DMA_TO_DEVICE);
494 	gather_list.length = sizeof (struct ib_sa_mad);
495 	gather_list.lkey   = port->mr->lkey;
496 	pci_unmap_addr_set(query, mapping, gather_list.addr);
497 
498 	ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
499 	if (ret) {
500 		dma_unmap_single(port->agent->device->dma_device,
501 				 pci_unmap_addr(query, mapping),
502 				 sizeof (struct ib_sa_mad),
503 				 DMA_TO_DEVICE);
504 		kref_put(&query->sm_ah->ref, free_sm_ah);
505 		spin_lock_irqsave(&idr_lock, flags);
506 		idr_remove(&query_idr, query->id);
507 		spin_unlock_irqrestore(&idr_lock, flags);
508 	}
509 
510 	return ret;
511 }
512 
513 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
514 				    int status,
515 				    struct ib_sa_mad *mad)
516 {
517 	struct ib_sa_path_query *query =
518 		container_of(sa_query, struct ib_sa_path_query, sa_query);
519 
520 	if (mad) {
521 		struct ib_sa_path_rec rec;
522 
523 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
524 			  mad->data, &rec);
525 		query->callback(status, &rec, query->context);
526 	} else
527 		query->callback(status, NULL, query->context);
528 }
529 
530 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
531 {
532 	kfree(sa_query->mad);
533 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
534 }
535 
536 /**
537  * ib_sa_path_rec_get - Start a Path get query
538  * @device:device to send query on
539  * @port_num: port number to send query on
540  * @rec:Path Record to send in query
541  * @comp_mask:component mask to send in query
542  * @timeout_ms:time to wait for response
543  * @gfp_mask:GFP mask to use for internal allocations
544  * @callback:function called when query completes, times out or is
545  * canceled
546  * @context:opaque user context passed to callback
547  * @sa_query:query context, used to cancel query
548  *
549  * Send a Path Record Get query to the SA to look up a path.  The
550  * callback function will be called when the query completes (or
551  * fails); status is 0 for a successful response, -EINTR if the query
552  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
553  * occurred sending the query.  The resp parameter of the callback is
554  * only valid if status is 0.
555  *
556  * If the return value of ib_sa_path_rec_get() is negative, it is an
557  * error code.  Otherwise it is a query ID that can be used to cancel
558  * the query.
559  */
560 int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
561 		       struct ib_sa_path_rec *rec,
562 		       ib_sa_comp_mask comp_mask,
563 		       int timeout_ms, int gfp_mask,
564 		       void (*callback)(int status,
565 					struct ib_sa_path_rec *resp,
566 					void *context),
567 		       void *context,
568 		       struct ib_sa_query **sa_query)
569 {
570 	struct ib_sa_path_query *query;
571 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
572 	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
573 	struct ib_mad_agent *agent  = port->agent;
574 	int ret;
575 
576 	query = kmalloc(sizeof *query, gfp_mask);
577 	if (!query)
578 		return -ENOMEM;
579 	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
580 	if (!query->sa_query.mad) {
581 		kfree(query);
582 		return -ENOMEM;
583 	}
584 
585 	query->callback = callback;
586 	query->context  = context;
587 
588 	init_mad(query->sa_query.mad, agent);
589 
590 	query->sa_query.callback              = ib_sa_path_rec_callback;
591 	query->sa_query.release               = ib_sa_path_rec_release;
592 	query->sa_query.port                  = port;
593 	query->sa_query.mad->mad_hdr.method   = IB_MGMT_METHOD_GET;
594 	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_PATH_REC);
595 	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
596 
597 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
598 		rec, query->sa_query.mad->data);
599 
600 	*sa_query = &query->sa_query;
601 	ret = send_mad(&query->sa_query, timeout_ms);
602 	if (ret) {
603 		*sa_query = NULL;
604 		kfree(query->sa_query.mad);
605 		kfree(query);
606 	}
607 
608 	return ret ? ret : query->sa_query.id;
609 }
610 EXPORT_SYMBOL(ib_sa_path_rec_get);
611 
612 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
613 					int status,
614 					struct ib_sa_mad *mad)
615 {
616 	struct ib_sa_mcmember_query *query =
617 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
618 
619 	if (mad) {
620 		struct ib_sa_mcmember_rec rec;
621 
622 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
623 			  mad->data, &rec);
624 		query->callback(status, &rec, query->context);
625 	} else
626 		query->callback(status, NULL, query->context);
627 }
628 
629 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
630 {
631 	kfree(sa_query->mad);
632 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
633 }
634 
635 int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
636 			     u8 method,
637 			     struct ib_sa_mcmember_rec *rec,
638 			     ib_sa_comp_mask comp_mask,
639 			     int timeout_ms, int gfp_mask,
640 			     void (*callback)(int status,
641 					      struct ib_sa_mcmember_rec *resp,
642 					      void *context),
643 			     void *context,
644 			     struct ib_sa_query **sa_query)
645 {
646 	struct ib_sa_mcmember_query *query;
647 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
648 	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
649 	struct ib_mad_agent *agent  = port->agent;
650 	int ret;
651 
652 	query = kmalloc(sizeof *query, gfp_mask);
653 	if (!query)
654 		return -ENOMEM;
655 	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
656 	if (!query->sa_query.mad) {
657 		kfree(query);
658 		return -ENOMEM;
659 	}
660 
661 	query->callback = callback;
662 	query->context  = context;
663 
664 	init_mad(query->sa_query.mad, agent);
665 
666 	query->sa_query.callback              = ib_sa_mcmember_rec_callback;
667 	query->sa_query.release               = ib_sa_mcmember_rec_release;
668 	query->sa_query.port                  = port;
669 	query->sa_query.mad->mad_hdr.method   = method;
670 	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
671 	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
672 
673 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
674 		rec, query->sa_query.mad->data);
675 
676 	*sa_query = &query->sa_query;
677 	ret = send_mad(&query->sa_query, timeout_ms);
678 	if (ret) {
679 		*sa_query = NULL;
680 		kfree(query->sa_query.mad);
681 		kfree(query);
682 	}
683 
684 	return ret ? ret : query->sa_query.id;
685 }
686 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
687 
688 static void send_handler(struct ib_mad_agent *agent,
689 			 struct ib_mad_send_wc *mad_send_wc)
690 {
691 	struct ib_sa_query *query;
692 	unsigned long flags;
693 
694 	spin_lock_irqsave(&idr_lock, flags);
695 	query = idr_find(&query_idr, mad_send_wc->wr_id);
696 	spin_unlock_irqrestore(&idr_lock, flags);
697 
698 	if (!query)
699 		return;
700 
701 	switch (mad_send_wc->status) {
702 	case IB_WC_SUCCESS:
703 		/* No callback -- already got recv */
704 		break;
705 	case IB_WC_RESP_TIMEOUT_ERR:
706 		query->callback(query, -ETIMEDOUT, NULL);
707 		break;
708 	case IB_WC_WR_FLUSH_ERR:
709 		query->callback(query, -EINTR, NULL);
710 		break;
711 	default:
712 		query->callback(query, -EIO, NULL);
713 		break;
714 	}
715 
716 	dma_unmap_single(agent->device->dma_device,
717 			 pci_unmap_addr(query, mapping),
718 			 sizeof (struct ib_sa_mad),
719 			 DMA_TO_DEVICE);
720 	kref_put(&query->sm_ah->ref, free_sm_ah);
721 
722 	query->release(query);
723 
724 	spin_lock_irqsave(&idr_lock, flags);
725 	idr_remove(&query_idr, mad_send_wc->wr_id);
726 	spin_unlock_irqrestore(&idr_lock, flags);
727 }
728 
729 static void recv_handler(struct ib_mad_agent *mad_agent,
730 			 struct ib_mad_recv_wc *mad_recv_wc)
731 {
732 	struct ib_sa_query *query;
733 	unsigned long flags;
734 
735 	spin_lock_irqsave(&idr_lock, flags);
736 	query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
737 	spin_unlock_irqrestore(&idr_lock, flags);
738 
739 	if (query) {
740 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
741 			query->callback(query,
742 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
743 					-EINVAL : 0,
744 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
745 		else
746 			query->callback(query, -EIO, NULL);
747 	}
748 
749 	ib_free_recv_mad(mad_recv_wc);
750 }
751 
752 static void ib_sa_add_one(struct ib_device *device)
753 {
754 	struct ib_sa_device *sa_dev;
755 	int s, e, i;
756 
757 	if (device->node_type == IB_NODE_SWITCH)
758 		s = e = 0;
759 	else {
760 		s = 1;
761 		e = device->phys_port_cnt;
762 	}
763 
764 	sa_dev = kmalloc(sizeof *sa_dev +
765 			 (e - s + 1) * sizeof (struct ib_sa_port),
766 			 GFP_KERNEL);
767 	if (!sa_dev)
768 		return;
769 
770 	sa_dev->start_port = s;
771 	sa_dev->end_port   = e;
772 
773 	for (i = 0; i <= e - s; ++i) {
774 		sa_dev->port[i].mr       = NULL;
775 		sa_dev->port[i].sm_ah    = NULL;
776 		sa_dev->port[i].port_num = i + s;
777 		spin_lock_init(&sa_dev->port[i].ah_lock);
778 
779 		sa_dev->port[i].agent =
780 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
781 					      NULL, 0, send_handler,
782 					      recv_handler, sa_dev);
783 		if (IS_ERR(sa_dev->port[i].agent))
784 			goto err;
785 
786 		sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd,
787 						   IB_ACCESS_LOCAL_WRITE);
788 		if (IS_ERR(sa_dev->port[i].mr)) {
789 			ib_unregister_mad_agent(sa_dev->port[i].agent);
790 			goto err;
791 		}
792 
793 		INIT_WORK(&sa_dev->port[i].update_task,
794 			  update_sm_ah, &sa_dev->port[i]);
795 	}
796 
797 	ib_set_client_data(device, &sa_client, sa_dev);
798 
799 	/*
800 	 * We register our event handler after everything is set up,
801 	 * and then update our cached info after the event handler is
802 	 * registered to avoid any problems if a port changes state
803 	 * during our initialization.
804 	 */
805 
806 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
807 	if (ib_register_event_handler(&sa_dev->event_handler))
808 		goto err;
809 
810 	for (i = 0; i <= e - s; ++i)
811 		update_sm_ah(&sa_dev->port[i]);
812 
813 	return;
814 
815 err:
816 	while (--i >= 0) {
817 		ib_dereg_mr(sa_dev->port[i].mr);
818 		ib_unregister_mad_agent(sa_dev->port[i].agent);
819 	}
820 
821 	kfree(sa_dev);
822 
823 	return;
824 }
825 
826 static void ib_sa_remove_one(struct ib_device *device)
827 {
828 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
829 	int i;
830 
831 	if (!sa_dev)
832 		return;
833 
834 	ib_unregister_event_handler(&sa_dev->event_handler);
835 
836 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
837 		ib_unregister_mad_agent(sa_dev->port[i].agent);
838 		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
839 	}
840 
841 	kfree(sa_dev);
842 }
843 
844 static int __init ib_sa_init(void)
845 {
846 	int ret;
847 
848 	spin_lock_init(&idr_lock);
849 	spin_lock_init(&tid_lock);
850 
851 	get_random_bytes(&tid, sizeof tid);
852 
853 	ret = ib_register_client(&sa_client);
854 	if (ret)
855 		printk(KERN_ERR "Couldn't register ib_sa client\n");
856 
857 	return ret;
858 }
859 
860 static void __exit ib_sa_cleanup(void)
861 {
862 	ib_unregister_client(&sa_client);
863 }
864 
865 module_init(ib_sa_init);
866 module_exit(ib_sa_cleanup);
867