1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
35  */
36 
37 #include <linux/module.h>
38 #include <linux/init.h>
39 #include <linux/err.h>
40 #include <linux/random.h>
41 #include <linux/spinlock.h>
42 #include <linux/slab.h>
43 #include <linux/dma-mapping.h>
44 #include <linux/kref.h>
45 #include <linux/idr.h>
46 #include <linux/workqueue.h>
47 
48 #include <rdma/ib_pack.h>
49 #include <rdma/ib_cache.h>
50 #include "sa.h"
51 
52 MODULE_AUTHOR("Roland Dreier");
53 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
54 MODULE_LICENSE("Dual BSD/GPL");
55 
56 struct ib_sa_sm_ah {
57 	struct ib_ah        *ah;
58 	struct kref          ref;
59 	u16		     pkey_index;
60 	u8		     src_path_mask;
61 };
62 
63 struct ib_sa_port {
64 	struct ib_mad_agent *agent;
65 	struct ib_sa_sm_ah  *sm_ah;
66 	struct work_struct   update_task;
67 	spinlock_t           ah_lock;
68 	u8                   port_num;
69 };
70 
71 struct ib_sa_device {
72 	int                     start_port, end_port;
73 	struct ib_event_handler event_handler;
74 	struct ib_sa_port port[0];
75 };
76 
77 struct ib_sa_query {
78 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
79 	void (*release)(struct ib_sa_query *);
80 	struct ib_sa_client    *client;
81 	struct ib_sa_port      *port;
82 	struct ib_mad_send_buf *mad_buf;
83 	struct ib_sa_sm_ah     *sm_ah;
84 	int			id;
85 };
86 
87 struct ib_sa_service_query {
88 	void (*callback)(int, struct ib_sa_service_rec *, void *);
89 	void *context;
90 	struct ib_sa_query sa_query;
91 };
92 
93 struct ib_sa_path_query {
94 	void (*callback)(int, struct ib_sa_path_rec *, void *);
95 	void *context;
96 	struct ib_sa_query sa_query;
97 };
98 
99 struct ib_sa_mcmember_query {
100 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
101 	void *context;
102 	struct ib_sa_query sa_query;
103 };
104 
105 static void ib_sa_add_one(struct ib_device *device);
106 static void ib_sa_remove_one(struct ib_device *device);
107 
108 static struct ib_client sa_client = {
109 	.name   = "sa",
110 	.add    = ib_sa_add_one,
111 	.remove = ib_sa_remove_one
112 };
113 
114 static spinlock_t idr_lock;
115 static DEFINE_IDR(query_idr);
116 
117 static spinlock_t tid_lock;
118 static u32 tid;
119 
120 #define PATH_REC_FIELD(field) \
121 	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
122 	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
123 	.field_name          = "sa_path_rec:" #field
124 
125 static const struct ib_field path_rec_table[] = {
126 	{ RESERVED,
127 	  .offset_words = 0,
128 	  .offset_bits  = 0,
129 	  .size_bits    = 32 },
130 	{ RESERVED,
131 	  .offset_words = 1,
132 	  .offset_bits  = 0,
133 	  .size_bits    = 32 },
134 	{ PATH_REC_FIELD(dgid),
135 	  .offset_words = 2,
136 	  .offset_bits  = 0,
137 	  .size_bits    = 128 },
138 	{ PATH_REC_FIELD(sgid),
139 	  .offset_words = 6,
140 	  .offset_bits  = 0,
141 	  .size_bits    = 128 },
142 	{ PATH_REC_FIELD(dlid),
143 	  .offset_words = 10,
144 	  .offset_bits  = 0,
145 	  .size_bits    = 16 },
146 	{ PATH_REC_FIELD(slid),
147 	  .offset_words = 10,
148 	  .offset_bits  = 16,
149 	  .size_bits    = 16 },
150 	{ PATH_REC_FIELD(raw_traffic),
151 	  .offset_words = 11,
152 	  .offset_bits  = 0,
153 	  .size_bits    = 1 },
154 	{ RESERVED,
155 	  .offset_words = 11,
156 	  .offset_bits  = 1,
157 	  .size_bits    = 3 },
158 	{ PATH_REC_FIELD(flow_label),
159 	  .offset_words = 11,
160 	  .offset_bits  = 4,
161 	  .size_bits    = 20 },
162 	{ PATH_REC_FIELD(hop_limit),
163 	  .offset_words = 11,
164 	  .offset_bits  = 24,
165 	  .size_bits    = 8 },
166 	{ PATH_REC_FIELD(traffic_class),
167 	  .offset_words = 12,
168 	  .offset_bits  = 0,
169 	  .size_bits    = 8 },
170 	{ PATH_REC_FIELD(reversible),
171 	  .offset_words = 12,
172 	  .offset_bits  = 8,
173 	  .size_bits    = 1 },
174 	{ PATH_REC_FIELD(numb_path),
175 	  .offset_words = 12,
176 	  .offset_bits  = 9,
177 	  .size_bits    = 7 },
178 	{ PATH_REC_FIELD(pkey),
179 	  .offset_words = 12,
180 	  .offset_bits  = 16,
181 	  .size_bits    = 16 },
182 	{ RESERVED,
183 	  .offset_words = 13,
184 	  .offset_bits  = 0,
185 	  .size_bits    = 12 },
186 	{ PATH_REC_FIELD(sl),
187 	  .offset_words = 13,
188 	  .offset_bits  = 12,
189 	  .size_bits    = 4 },
190 	{ PATH_REC_FIELD(mtu_selector),
191 	  .offset_words = 13,
192 	  .offset_bits  = 16,
193 	  .size_bits    = 2 },
194 	{ PATH_REC_FIELD(mtu),
195 	  .offset_words = 13,
196 	  .offset_bits  = 18,
197 	  .size_bits    = 6 },
198 	{ PATH_REC_FIELD(rate_selector),
199 	  .offset_words = 13,
200 	  .offset_bits  = 24,
201 	  .size_bits    = 2 },
202 	{ PATH_REC_FIELD(rate),
203 	  .offset_words = 13,
204 	  .offset_bits  = 26,
205 	  .size_bits    = 6 },
206 	{ PATH_REC_FIELD(packet_life_time_selector),
207 	  .offset_words = 14,
208 	  .offset_bits  = 0,
209 	  .size_bits    = 2 },
210 	{ PATH_REC_FIELD(packet_life_time),
211 	  .offset_words = 14,
212 	  .offset_bits  = 2,
213 	  .size_bits    = 6 },
214 	{ PATH_REC_FIELD(preference),
215 	  .offset_words = 14,
216 	  .offset_bits  = 8,
217 	  .size_bits    = 8 },
218 	{ RESERVED,
219 	  .offset_words = 14,
220 	  .offset_bits  = 16,
221 	  .size_bits    = 48 },
222 };
223 
224 #define MCMEMBER_REC_FIELD(field) \
225 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
226 	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
227 	.field_name          = "sa_mcmember_rec:" #field
228 
229 static const struct ib_field mcmember_rec_table[] = {
230 	{ MCMEMBER_REC_FIELD(mgid),
231 	  .offset_words = 0,
232 	  .offset_bits  = 0,
233 	  .size_bits    = 128 },
234 	{ MCMEMBER_REC_FIELD(port_gid),
235 	  .offset_words = 4,
236 	  .offset_bits  = 0,
237 	  .size_bits    = 128 },
238 	{ MCMEMBER_REC_FIELD(qkey),
239 	  .offset_words = 8,
240 	  .offset_bits  = 0,
241 	  .size_bits    = 32 },
242 	{ MCMEMBER_REC_FIELD(mlid),
243 	  .offset_words = 9,
244 	  .offset_bits  = 0,
245 	  .size_bits    = 16 },
246 	{ MCMEMBER_REC_FIELD(mtu_selector),
247 	  .offset_words = 9,
248 	  .offset_bits  = 16,
249 	  .size_bits    = 2 },
250 	{ MCMEMBER_REC_FIELD(mtu),
251 	  .offset_words = 9,
252 	  .offset_bits  = 18,
253 	  .size_bits    = 6 },
254 	{ MCMEMBER_REC_FIELD(traffic_class),
255 	  .offset_words = 9,
256 	  .offset_bits  = 24,
257 	  .size_bits    = 8 },
258 	{ MCMEMBER_REC_FIELD(pkey),
259 	  .offset_words = 10,
260 	  .offset_bits  = 0,
261 	  .size_bits    = 16 },
262 	{ MCMEMBER_REC_FIELD(rate_selector),
263 	  .offset_words = 10,
264 	  .offset_bits  = 16,
265 	  .size_bits    = 2 },
266 	{ MCMEMBER_REC_FIELD(rate),
267 	  .offset_words = 10,
268 	  .offset_bits  = 18,
269 	  .size_bits    = 6 },
270 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
271 	  .offset_words = 10,
272 	  .offset_bits  = 24,
273 	  .size_bits    = 2 },
274 	{ MCMEMBER_REC_FIELD(packet_life_time),
275 	  .offset_words = 10,
276 	  .offset_bits  = 26,
277 	  .size_bits    = 6 },
278 	{ MCMEMBER_REC_FIELD(sl),
279 	  .offset_words = 11,
280 	  .offset_bits  = 0,
281 	  .size_bits    = 4 },
282 	{ MCMEMBER_REC_FIELD(flow_label),
283 	  .offset_words = 11,
284 	  .offset_bits  = 4,
285 	  .size_bits    = 20 },
286 	{ MCMEMBER_REC_FIELD(hop_limit),
287 	  .offset_words = 11,
288 	  .offset_bits  = 24,
289 	  .size_bits    = 8 },
290 	{ MCMEMBER_REC_FIELD(scope),
291 	  .offset_words = 12,
292 	  .offset_bits  = 0,
293 	  .size_bits    = 4 },
294 	{ MCMEMBER_REC_FIELD(join_state),
295 	  .offset_words = 12,
296 	  .offset_bits  = 4,
297 	  .size_bits    = 4 },
298 	{ MCMEMBER_REC_FIELD(proxy_join),
299 	  .offset_words = 12,
300 	  .offset_bits  = 8,
301 	  .size_bits    = 1 },
302 	{ RESERVED,
303 	  .offset_words = 12,
304 	  .offset_bits  = 9,
305 	  .size_bits    = 23 },
306 };
307 
308 #define SERVICE_REC_FIELD(field) \
309 	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
310 	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
311 	.field_name          = "sa_service_rec:" #field
312 
313 static const struct ib_field service_rec_table[] = {
314 	{ SERVICE_REC_FIELD(id),
315 	  .offset_words = 0,
316 	  .offset_bits  = 0,
317 	  .size_bits    = 64 },
318 	{ SERVICE_REC_FIELD(gid),
319 	  .offset_words = 2,
320 	  .offset_bits  = 0,
321 	  .size_bits    = 128 },
322 	{ SERVICE_REC_FIELD(pkey),
323 	  .offset_words = 6,
324 	  .offset_bits  = 0,
325 	  .size_bits    = 16 },
326 	{ SERVICE_REC_FIELD(lease),
327 	  .offset_words = 7,
328 	  .offset_bits  = 0,
329 	  .size_bits    = 32 },
330 	{ SERVICE_REC_FIELD(key),
331 	  .offset_words = 8,
332 	  .offset_bits  = 0,
333 	  .size_bits    = 128 },
334 	{ SERVICE_REC_FIELD(name),
335 	  .offset_words = 12,
336 	  .offset_bits  = 0,
337 	  .size_bits    = 64*8 },
338 	{ SERVICE_REC_FIELD(data8),
339 	  .offset_words = 28,
340 	  .offset_bits  = 0,
341 	  .size_bits    = 16*8 },
342 	{ SERVICE_REC_FIELD(data16),
343 	  .offset_words = 32,
344 	  .offset_bits  = 0,
345 	  .size_bits    = 8*16 },
346 	{ SERVICE_REC_FIELD(data32),
347 	  .offset_words = 36,
348 	  .offset_bits  = 0,
349 	  .size_bits    = 4*32 },
350 	{ SERVICE_REC_FIELD(data64),
351 	  .offset_words = 40,
352 	  .offset_bits  = 0,
353 	  .size_bits    = 2*64 },
354 };
355 
356 static void free_sm_ah(struct kref *kref)
357 {
358 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
359 
360 	ib_destroy_ah(sm_ah->ah);
361 	kfree(sm_ah);
362 }
363 
364 static void update_sm_ah(struct work_struct *work)
365 {
366 	struct ib_sa_port *port =
367 		container_of(work, struct ib_sa_port, update_task);
368 	struct ib_sa_sm_ah *new_ah, *old_ah;
369 	struct ib_port_attr port_attr;
370 	struct ib_ah_attr   ah_attr;
371 
372 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
373 		printk(KERN_WARNING "Couldn't query port\n");
374 		return;
375 	}
376 
377 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
378 	if (!new_ah) {
379 		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
380 		return;
381 	}
382 
383 	kref_init(&new_ah->ref);
384 	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
385 
386 	new_ah->pkey_index = 0;
387 	if (ib_find_pkey(port->agent->device, port->port_num,
388 			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index) &&
389 	    ib_find_pkey(port->agent->device, port->port_num,
390 			 IB_DEFAULT_PKEY_PARTIAL, &new_ah->pkey_index))
391 		printk(KERN_ERR "Couldn't find index for default PKey\n");
392 
393 	memset(&ah_attr, 0, sizeof ah_attr);
394 	ah_attr.dlid     = port_attr.sm_lid;
395 	ah_attr.sl       = port_attr.sm_sl;
396 	ah_attr.port_num = port->port_num;
397 
398 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
399 	if (IS_ERR(new_ah->ah)) {
400 		printk(KERN_WARNING "Couldn't create new SM AH\n");
401 		kfree(new_ah);
402 		return;
403 	}
404 
405 	spin_lock_irq(&port->ah_lock);
406 	old_ah = port->sm_ah;
407 	port->sm_ah = new_ah;
408 	spin_unlock_irq(&port->ah_lock);
409 
410 	if (old_ah)
411 		kref_put(&old_ah->ref, free_sm_ah);
412 }
413 
414 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
415 {
416 	if (event->event == IB_EVENT_PORT_ERR    ||
417 	    event->event == IB_EVENT_PORT_ACTIVE ||
418 	    event->event == IB_EVENT_LID_CHANGE  ||
419 	    event->event == IB_EVENT_PKEY_CHANGE ||
420 	    event->event == IB_EVENT_SM_CHANGE   ||
421 	    event->event == IB_EVENT_CLIENT_REREGISTER) {
422 		struct ib_sa_device *sa_dev;
423 		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
424 
425 		schedule_work(&sa_dev->port[event->element.port_num -
426 					    sa_dev->start_port].update_task);
427 	}
428 }
429 
430 void ib_sa_register_client(struct ib_sa_client *client)
431 {
432 	atomic_set(&client->users, 1);
433 	init_completion(&client->comp);
434 }
435 EXPORT_SYMBOL(ib_sa_register_client);
436 
437 void ib_sa_unregister_client(struct ib_sa_client *client)
438 {
439 	ib_sa_client_put(client);
440 	wait_for_completion(&client->comp);
441 }
442 EXPORT_SYMBOL(ib_sa_unregister_client);
443 
444 /**
445  * ib_sa_cancel_query - try to cancel an SA query
446  * @id:ID of query to cancel
447  * @query:query pointer to cancel
448  *
449  * Try to cancel an SA query.  If the id and query don't match up or
450  * the query has already completed, nothing is done.  Otherwise the
451  * query is canceled and will complete with a status of -EINTR.
452  */
453 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
454 {
455 	unsigned long flags;
456 	struct ib_mad_agent *agent;
457 	struct ib_mad_send_buf *mad_buf;
458 
459 	spin_lock_irqsave(&idr_lock, flags);
460 	if (idr_find(&query_idr, id) != query) {
461 		spin_unlock_irqrestore(&idr_lock, flags);
462 		return;
463 	}
464 	agent = query->port->agent;
465 	mad_buf = query->mad_buf;
466 	spin_unlock_irqrestore(&idr_lock, flags);
467 
468 	ib_cancel_mad(agent, mad_buf);
469 }
470 EXPORT_SYMBOL(ib_sa_cancel_query);
471 
472 static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
473 {
474 	struct ib_sa_device *sa_dev;
475 	struct ib_sa_port   *port;
476 	unsigned long flags;
477 	u8 src_path_mask;
478 
479 	sa_dev = ib_get_client_data(device, &sa_client);
480 	if (!sa_dev)
481 		return 0x7f;
482 
483 	port  = &sa_dev->port[port_num - sa_dev->start_port];
484 	spin_lock_irqsave(&port->ah_lock, flags);
485 	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
486 	spin_unlock_irqrestore(&port->ah_lock, flags);
487 
488 	return src_path_mask;
489 }
490 
491 int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
492 			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
493 {
494 	int ret;
495 	u16 gid_index;
496 
497 	memset(ah_attr, 0, sizeof *ah_attr);
498 	ah_attr->dlid = be16_to_cpu(rec->dlid);
499 	ah_attr->sl = rec->sl;
500 	ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
501 				 get_src_path_mask(device, port_num);
502 	ah_attr->port_num = port_num;
503 	ah_attr->static_rate = rec->rate;
504 
505 	if (rec->hop_limit > 1) {
506 		ah_attr->ah_flags = IB_AH_GRH;
507 		ah_attr->grh.dgid = rec->dgid;
508 
509 		ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
510 					 &gid_index);
511 		if (ret)
512 			return ret;
513 
514 		ah_attr->grh.sgid_index    = gid_index;
515 		ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
516 		ah_attr->grh.hop_limit     = rec->hop_limit;
517 		ah_attr->grh.traffic_class = rec->traffic_class;
518 	}
519 	return 0;
520 }
521 EXPORT_SYMBOL(ib_init_ah_from_path);
522 
523 static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
524 {
525 	unsigned long flags;
526 
527 	spin_lock_irqsave(&query->port->ah_lock, flags);
528 	kref_get(&query->port->sm_ah->ref);
529 	query->sm_ah = query->port->sm_ah;
530 	spin_unlock_irqrestore(&query->port->ah_lock, flags);
531 
532 	query->mad_buf = ib_create_send_mad(query->port->agent, 1,
533 					    query->sm_ah->pkey_index,
534 					    0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
535 					    gfp_mask);
536 	if (!query->mad_buf) {
537 		kref_put(&query->sm_ah->ref, free_sm_ah);
538 		return -ENOMEM;
539 	}
540 
541 	query->mad_buf->ah = query->sm_ah->ah;
542 
543 	return 0;
544 }
545 
546 static void free_mad(struct ib_sa_query *query)
547 {
548 	ib_free_send_mad(query->mad_buf);
549 	kref_put(&query->sm_ah->ref, free_sm_ah);
550 }
551 
552 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
553 {
554 	unsigned long flags;
555 
556 	memset(mad, 0, sizeof *mad);
557 
558 	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
559 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
560 	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
561 
562 	spin_lock_irqsave(&tid_lock, flags);
563 	mad->mad_hdr.tid           =
564 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
565 	spin_unlock_irqrestore(&tid_lock, flags);
566 }
567 
568 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
569 {
570 	unsigned long flags;
571 	int ret, id;
572 
573 retry:
574 	if (!idr_pre_get(&query_idr, gfp_mask))
575 		return -ENOMEM;
576 	spin_lock_irqsave(&idr_lock, flags);
577 	ret = idr_get_new(&query_idr, query, &id);
578 	spin_unlock_irqrestore(&idr_lock, flags);
579 	if (ret == -EAGAIN)
580 		goto retry;
581 	if (ret)
582 		return ret;
583 
584 	query->mad_buf->timeout_ms  = timeout_ms;
585 	query->mad_buf->context[0] = query;
586 	query->id = id;
587 
588 	ret = ib_post_send_mad(query->mad_buf, NULL);
589 	if (ret) {
590 		spin_lock_irqsave(&idr_lock, flags);
591 		idr_remove(&query_idr, id);
592 		spin_unlock_irqrestore(&idr_lock, flags);
593 	}
594 
595 	/*
596 	 * It's not safe to dereference query any more, because the
597 	 * send may already have completed and freed the query in
598 	 * another context.
599 	 */
600 	return ret ? ret : id;
601 }
602 
603 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
604 				    int status,
605 				    struct ib_sa_mad *mad)
606 {
607 	struct ib_sa_path_query *query =
608 		container_of(sa_query, struct ib_sa_path_query, sa_query);
609 
610 	if (mad) {
611 		struct ib_sa_path_rec rec;
612 
613 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
614 			  mad->data, &rec);
615 		query->callback(status, &rec, query->context);
616 	} else
617 		query->callback(status, NULL, query->context);
618 }
619 
620 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
621 {
622 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
623 }
624 
625 /**
626  * ib_sa_path_rec_get - Start a Path get query
627  * @client:SA client
628  * @device:device to send query on
629  * @port_num: port number to send query on
630  * @rec:Path Record to send in query
631  * @comp_mask:component mask to send in query
632  * @timeout_ms:time to wait for response
633  * @gfp_mask:GFP mask to use for internal allocations
634  * @callback:function called when query completes, times out or is
635  * canceled
636  * @context:opaque user context passed to callback
637  * @sa_query:query context, used to cancel query
638  *
639  * Send a Path Record Get query to the SA to look up a path.  The
640  * callback function will be called when the query completes (or
641  * fails); status is 0 for a successful response, -EINTR if the query
642  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
643  * occurred sending the query.  The resp parameter of the callback is
644  * only valid if status is 0.
645  *
646  * If the return value of ib_sa_path_rec_get() is negative, it is an
647  * error code.  Otherwise it is a query ID that can be used to cancel
648  * the query.
649  */
650 int ib_sa_path_rec_get(struct ib_sa_client *client,
651 		       struct ib_device *device, u8 port_num,
652 		       struct ib_sa_path_rec *rec,
653 		       ib_sa_comp_mask comp_mask,
654 		       int timeout_ms, gfp_t gfp_mask,
655 		       void (*callback)(int status,
656 					struct ib_sa_path_rec *resp,
657 					void *context),
658 		       void *context,
659 		       struct ib_sa_query **sa_query)
660 {
661 	struct ib_sa_path_query *query;
662 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
663 	struct ib_sa_port   *port;
664 	struct ib_mad_agent *agent;
665 	struct ib_sa_mad *mad;
666 	int ret;
667 
668 	if (!sa_dev)
669 		return -ENODEV;
670 
671 	port  = &sa_dev->port[port_num - sa_dev->start_port];
672 	agent = port->agent;
673 
674 	query = kmalloc(sizeof *query, gfp_mask);
675 	if (!query)
676 		return -ENOMEM;
677 
678 	query->sa_query.port     = port;
679 	ret = alloc_mad(&query->sa_query, gfp_mask);
680 	if (ret)
681 		goto err1;
682 
683 	ib_sa_client_get(client);
684 	query->sa_query.client = client;
685 	query->callback        = callback;
686 	query->context         = context;
687 
688 	mad = query->sa_query.mad_buf->mad;
689 	init_mad(mad, agent);
690 
691 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
692 	query->sa_query.release  = ib_sa_path_rec_release;
693 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
694 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
695 	mad->sa_hdr.comp_mask	 = comp_mask;
696 
697 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
698 
699 	*sa_query = &query->sa_query;
700 
701 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
702 	if (ret < 0)
703 		goto err2;
704 
705 	return ret;
706 
707 err2:
708 	*sa_query = NULL;
709 	ib_sa_client_put(query->sa_query.client);
710 	free_mad(&query->sa_query);
711 
712 err1:
713 	kfree(query);
714 	return ret;
715 }
716 EXPORT_SYMBOL(ib_sa_path_rec_get);
717 
718 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
719 				    int status,
720 				    struct ib_sa_mad *mad)
721 {
722 	struct ib_sa_service_query *query =
723 		container_of(sa_query, struct ib_sa_service_query, sa_query);
724 
725 	if (mad) {
726 		struct ib_sa_service_rec rec;
727 
728 		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
729 			  mad->data, &rec);
730 		query->callback(status, &rec, query->context);
731 	} else
732 		query->callback(status, NULL, query->context);
733 }
734 
735 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
736 {
737 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
738 }
739 
740 /**
741  * ib_sa_service_rec_query - Start Service Record operation
742  * @client:SA client
743  * @device:device to send request on
744  * @port_num: port number to send request on
745  * @method:SA method - should be get, set, or delete
746  * @rec:Service Record to send in request
747  * @comp_mask:component mask to send in request
748  * @timeout_ms:time to wait for response
749  * @gfp_mask:GFP mask to use for internal allocations
750  * @callback:function called when request completes, times out or is
751  * canceled
752  * @context:opaque user context passed to callback
753  * @sa_query:request context, used to cancel request
754  *
755  * Send a Service Record set/get/delete to the SA to register,
756  * unregister or query a service record.
757  * The callback function will be called when the request completes (or
758  * fails); status is 0 for a successful response, -EINTR if the query
759  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
760  * occurred sending the query.  The resp parameter of the callback is
761  * only valid if status is 0.
762  *
763  * If the return value of ib_sa_service_rec_query() is negative, it is an
764  * error code.  Otherwise it is a request ID that can be used to cancel
765  * the query.
766  */
767 int ib_sa_service_rec_query(struct ib_sa_client *client,
768 			    struct ib_device *device, u8 port_num, u8 method,
769 			    struct ib_sa_service_rec *rec,
770 			    ib_sa_comp_mask comp_mask,
771 			    int timeout_ms, gfp_t gfp_mask,
772 			    void (*callback)(int status,
773 					     struct ib_sa_service_rec *resp,
774 					     void *context),
775 			    void *context,
776 			    struct ib_sa_query **sa_query)
777 {
778 	struct ib_sa_service_query *query;
779 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
780 	struct ib_sa_port   *port;
781 	struct ib_mad_agent *agent;
782 	struct ib_sa_mad *mad;
783 	int ret;
784 
785 	if (!sa_dev)
786 		return -ENODEV;
787 
788 	port  = &sa_dev->port[port_num - sa_dev->start_port];
789 	agent = port->agent;
790 
791 	if (method != IB_MGMT_METHOD_GET &&
792 	    method != IB_MGMT_METHOD_SET &&
793 	    method != IB_SA_METHOD_DELETE)
794 		return -EINVAL;
795 
796 	query = kmalloc(sizeof *query, gfp_mask);
797 	if (!query)
798 		return -ENOMEM;
799 
800 	query->sa_query.port     = port;
801 	ret = alloc_mad(&query->sa_query, gfp_mask);
802 	if (ret)
803 		goto err1;
804 
805 	ib_sa_client_get(client);
806 	query->sa_query.client = client;
807 	query->callback        = callback;
808 	query->context         = context;
809 
810 	mad = query->sa_query.mad_buf->mad;
811 	init_mad(mad, agent);
812 
813 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
814 	query->sa_query.release  = ib_sa_service_rec_release;
815 	mad->mad_hdr.method	 = method;
816 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
817 	mad->sa_hdr.comp_mask	 = comp_mask;
818 
819 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
820 		rec, mad->data);
821 
822 	*sa_query = &query->sa_query;
823 
824 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
825 	if (ret < 0)
826 		goto err2;
827 
828 	return ret;
829 
830 err2:
831 	*sa_query = NULL;
832 	ib_sa_client_put(query->sa_query.client);
833 	free_mad(&query->sa_query);
834 
835 err1:
836 	kfree(query);
837 	return ret;
838 }
839 EXPORT_SYMBOL(ib_sa_service_rec_query);
840 
841 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
842 					int status,
843 					struct ib_sa_mad *mad)
844 {
845 	struct ib_sa_mcmember_query *query =
846 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
847 
848 	if (mad) {
849 		struct ib_sa_mcmember_rec rec;
850 
851 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
852 			  mad->data, &rec);
853 		query->callback(status, &rec, query->context);
854 	} else
855 		query->callback(status, NULL, query->context);
856 }
857 
858 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
859 {
860 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
861 }
862 
863 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
864 			     struct ib_device *device, u8 port_num,
865 			     u8 method,
866 			     struct ib_sa_mcmember_rec *rec,
867 			     ib_sa_comp_mask comp_mask,
868 			     int timeout_ms, gfp_t gfp_mask,
869 			     void (*callback)(int status,
870 					      struct ib_sa_mcmember_rec *resp,
871 					      void *context),
872 			     void *context,
873 			     struct ib_sa_query **sa_query)
874 {
875 	struct ib_sa_mcmember_query *query;
876 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
877 	struct ib_sa_port   *port;
878 	struct ib_mad_agent *agent;
879 	struct ib_sa_mad *mad;
880 	int ret;
881 
882 	if (!sa_dev)
883 		return -ENODEV;
884 
885 	port  = &sa_dev->port[port_num - sa_dev->start_port];
886 	agent = port->agent;
887 
888 	query = kmalloc(sizeof *query, gfp_mask);
889 	if (!query)
890 		return -ENOMEM;
891 
892 	query->sa_query.port     = port;
893 	ret = alloc_mad(&query->sa_query, gfp_mask);
894 	if (ret)
895 		goto err1;
896 
897 	ib_sa_client_get(client);
898 	query->sa_query.client = client;
899 	query->callback        = callback;
900 	query->context         = context;
901 
902 	mad = query->sa_query.mad_buf->mad;
903 	init_mad(mad, agent);
904 
905 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
906 	query->sa_query.release  = ib_sa_mcmember_rec_release;
907 	mad->mad_hdr.method	 = method;
908 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
909 	mad->sa_hdr.comp_mask	 = comp_mask;
910 
911 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
912 		rec, mad->data);
913 
914 	*sa_query = &query->sa_query;
915 
916 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
917 	if (ret < 0)
918 		goto err2;
919 
920 	return ret;
921 
922 err2:
923 	*sa_query = NULL;
924 	ib_sa_client_put(query->sa_query.client);
925 	free_mad(&query->sa_query);
926 
927 err1:
928 	kfree(query);
929 	return ret;
930 }
931 
932 static void send_handler(struct ib_mad_agent *agent,
933 			 struct ib_mad_send_wc *mad_send_wc)
934 {
935 	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
936 	unsigned long flags;
937 
938 	if (query->callback)
939 		switch (mad_send_wc->status) {
940 		case IB_WC_SUCCESS:
941 			/* No callback -- already got recv */
942 			break;
943 		case IB_WC_RESP_TIMEOUT_ERR:
944 			query->callback(query, -ETIMEDOUT, NULL);
945 			break;
946 		case IB_WC_WR_FLUSH_ERR:
947 			query->callback(query, -EINTR, NULL);
948 			break;
949 		default:
950 			query->callback(query, -EIO, NULL);
951 			break;
952 		}
953 
954 	spin_lock_irqsave(&idr_lock, flags);
955 	idr_remove(&query_idr, query->id);
956 	spin_unlock_irqrestore(&idr_lock, flags);
957 
958 	free_mad(query);
959 	ib_sa_client_put(query->client);
960 	query->release(query);
961 }
962 
963 static void recv_handler(struct ib_mad_agent *mad_agent,
964 			 struct ib_mad_recv_wc *mad_recv_wc)
965 {
966 	struct ib_sa_query *query;
967 	struct ib_mad_send_buf *mad_buf;
968 
969 	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
970 	query = mad_buf->context[0];
971 
972 	if (query->callback) {
973 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
974 			query->callback(query,
975 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
976 					-EINVAL : 0,
977 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
978 		else
979 			query->callback(query, -EIO, NULL);
980 	}
981 
982 	ib_free_recv_mad(mad_recv_wc);
983 }
984 
985 static void ib_sa_add_one(struct ib_device *device)
986 {
987 	struct ib_sa_device *sa_dev;
988 	int s, e, i;
989 
990 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
991 		return;
992 
993 	if (device->node_type == RDMA_NODE_IB_SWITCH)
994 		s = e = 0;
995 	else {
996 		s = 1;
997 		e = device->phys_port_cnt;
998 	}
999 
1000 	sa_dev = kmalloc(sizeof *sa_dev +
1001 			 (e - s + 1) * sizeof (struct ib_sa_port),
1002 			 GFP_KERNEL);
1003 	if (!sa_dev)
1004 		return;
1005 
1006 	sa_dev->start_port = s;
1007 	sa_dev->end_port   = e;
1008 
1009 	for (i = 0; i <= e - s; ++i) {
1010 		sa_dev->port[i].sm_ah    = NULL;
1011 		sa_dev->port[i].port_num = i + s;
1012 		spin_lock_init(&sa_dev->port[i].ah_lock);
1013 
1014 		sa_dev->port[i].agent =
1015 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
1016 					      NULL, 0, send_handler,
1017 					      recv_handler, sa_dev);
1018 		if (IS_ERR(sa_dev->port[i].agent))
1019 			goto err;
1020 
1021 		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
1022 	}
1023 
1024 	ib_set_client_data(device, &sa_client, sa_dev);
1025 
1026 	/*
1027 	 * We register our event handler after everything is set up,
1028 	 * and then update our cached info after the event handler is
1029 	 * registered to avoid any problems if a port changes state
1030 	 * during our initialization.
1031 	 */
1032 
1033 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
1034 	if (ib_register_event_handler(&sa_dev->event_handler))
1035 		goto err;
1036 
1037 	for (i = 0; i <= e - s; ++i)
1038 		update_sm_ah(&sa_dev->port[i].update_task);
1039 
1040 	return;
1041 
1042 err:
1043 	while (--i >= 0)
1044 		ib_unregister_mad_agent(sa_dev->port[i].agent);
1045 
1046 	kfree(sa_dev);
1047 
1048 	return;
1049 }
1050 
1051 static void ib_sa_remove_one(struct ib_device *device)
1052 {
1053 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1054 	int i;
1055 
1056 	if (!sa_dev)
1057 		return;
1058 
1059 	ib_unregister_event_handler(&sa_dev->event_handler);
1060 
1061 	flush_scheduled_work();
1062 
1063 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
1064 		ib_unregister_mad_agent(sa_dev->port[i].agent);
1065 		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
1066 	}
1067 
1068 	kfree(sa_dev);
1069 }
1070 
1071 static int __init ib_sa_init(void)
1072 {
1073 	int ret;
1074 
1075 	spin_lock_init(&idr_lock);
1076 	spin_lock_init(&tid_lock);
1077 
1078 	get_random_bytes(&tid, sizeof tid);
1079 
1080 	ret = ib_register_client(&sa_client);
1081 	if (ret) {
1082 		printk(KERN_ERR "Couldn't register ib_sa client\n");
1083 		goto err1;
1084 	}
1085 
1086 	ret = mcast_init();
1087 	if (ret) {
1088 		printk(KERN_ERR "Couldn't initialize multicast handling\n");
1089 		goto err2;
1090 	}
1091 
1092 	return 0;
1093 err2:
1094 	ib_unregister_client(&sa_client);
1095 err1:
1096 	return ret;
1097 }
1098 
1099 static void __exit ib_sa_cleanup(void)
1100 {
1101 	mcast_cleanup();
1102 	ib_unregister_client(&sa_client);
1103 	idr_destroy(&query_idr);
1104 }
1105 
1106 module_init(ib_sa_init);
1107 module_exit(ib_sa_cleanup);
1108