xref: /openbmc/linux/drivers/infiniband/core/sa_query.c (revision a1c7c49c2091926962f8c1c866d386febffec5d8)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/err.h>
38 #include <linux/random.h>
39 #include <linux/spinlock.h>
40 #include <linux/slab.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/kref.h>
43 #include <linux/xarray.h>
44 #include <linux/workqueue.h>
45 #include <uapi/linux/if_ether.h>
46 #include <rdma/ib_pack.h>
47 #include <rdma/ib_cache.h>
48 #include <rdma/rdma_netlink.h>
49 #include <net/netlink.h>
50 #include <uapi/rdma/ib_user_sa.h>
51 #include <rdma/ib_marshall.h>
52 #include <rdma/ib_addr.h>
53 #include <rdma/opa_addr.h>
54 #include "sa.h"
55 #include "core_priv.h"
56 
57 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN		100
58 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT		2000
59 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX		200000
60 #define IB_SA_CPI_MAX_RETRY_CNT			3
61 #define IB_SA_CPI_RETRY_WAIT			1000 /*msecs */
62 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
63 
64 struct ib_sa_sm_ah {
65 	struct ib_ah        *ah;
66 	struct kref          ref;
67 	u16		     pkey_index;
68 	u8		     src_path_mask;
69 };
70 
71 enum rdma_class_port_info_type {
72 	RDMA_CLASS_PORT_INFO_IB,
73 	RDMA_CLASS_PORT_INFO_OPA
74 };
75 
76 struct rdma_class_port_info {
77 	enum rdma_class_port_info_type type;
78 	union {
79 		struct ib_class_port_info ib;
80 		struct opa_class_port_info opa;
81 	};
82 };
83 
84 struct ib_sa_classport_cache {
85 	bool valid;
86 	int retry_cnt;
87 	struct rdma_class_port_info data;
88 };
89 
90 struct ib_sa_port {
91 	struct ib_mad_agent *agent;
92 	struct ib_sa_sm_ah  *sm_ah;
93 	struct work_struct   update_task;
94 	struct ib_sa_classport_cache classport_info;
95 	struct delayed_work ib_cpi_work;
96 	spinlock_t                   classport_lock; /* protects class port info set */
97 	spinlock_t           ah_lock;
98 	u32		     port_num;
99 };
100 
101 struct ib_sa_device {
102 	int                     start_port, end_port;
103 	struct ib_event_handler event_handler;
104 	struct ib_sa_port port[];
105 };
106 
107 struct ib_sa_query {
108 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
109 	void (*release)(struct ib_sa_query *);
110 	struct ib_sa_client    *client;
111 	struct ib_sa_port      *port;
112 	struct ib_mad_send_buf *mad_buf;
113 	struct ib_sa_sm_ah     *sm_ah;
114 	int			id;
115 	u32			flags;
116 	struct list_head	list; /* Local svc request list */
117 	u32			seq; /* Local svc request sequence number */
118 	unsigned long		timeout; /* Local svc timeout */
119 	u8			path_use; /* How will the pathrecord be used */
120 };
121 
122 #define IB_SA_ENABLE_LOCAL_SERVICE	0x00000001
123 #define IB_SA_CANCEL			0x00000002
124 #define IB_SA_QUERY_OPA			0x00000004
125 
126 struct ib_sa_path_query {
127 	void (*callback)(int, struct sa_path_rec *, void *);
128 	void *context;
129 	struct ib_sa_query sa_query;
130 	struct sa_path_rec *conv_pr;
131 };
132 
133 struct ib_sa_guidinfo_query {
134 	void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
135 	void *context;
136 	struct ib_sa_query sa_query;
137 };
138 
139 struct ib_sa_classport_info_query {
140 	void (*callback)(void *);
141 	void *context;
142 	struct ib_sa_query sa_query;
143 };
144 
145 struct ib_sa_mcmember_query {
146 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
147 	void *context;
148 	struct ib_sa_query sa_query;
149 };
150 
151 static LIST_HEAD(ib_nl_request_list);
152 static DEFINE_SPINLOCK(ib_nl_request_lock);
153 static atomic_t ib_nl_sa_request_seq;
154 static struct workqueue_struct *ib_nl_wq;
155 static struct delayed_work ib_nl_timed_work;
156 static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
157 	[LS_NLA_TYPE_PATH_RECORD]	= {.type = NLA_BINARY,
158 		.len = sizeof(struct ib_path_rec_data)},
159 	[LS_NLA_TYPE_TIMEOUT]		= {.type = NLA_U32},
160 	[LS_NLA_TYPE_SERVICE_ID]	= {.type = NLA_U64},
161 	[LS_NLA_TYPE_DGID]		= {.type = NLA_BINARY,
162 		.len = sizeof(struct rdma_nla_ls_gid)},
163 	[LS_NLA_TYPE_SGID]		= {.type = NLA_BINARY,
164 		.len = sizeof(struct rdma_nla_ls_gid)},
165 	[LS_NLA_TYPE_TCLASS]		= {.type = NLA_U8},
166 	[LS_NLA_TYPE_PKEY]		= {.type = NLA_U16},
167 	[LS_NLA_TYPE_QOS_CLASS]		= {.type = NLA_U16},
168 };
169 
170 
171 static int ib_sa_add_one(struct ib_device *device);
172 static void ib_sa_remove_one(struct ib_device *device, void *client_data);
173 
174 static struct ib_client sa_client = {
175 	.name   = "sa",
176 	.add    = ib_sa_add_one,
177 	.remove = ib_sa_remove_one
178 };
179 
180 static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
181 
182 static DEFINE_SPINLOCK(tid_lock);
183 static u32 tid;
184 
185 #define PATH_REC_FIELD(field) \
186 	.struct_offset_bytes = offsetof(struct sa_path_rec, field),	\
187 	.struct_size_bytes   = sizeof_field(struct sa_path_rec, field),	\
188 	.field_name          = "sa_path_rec:" #field
189 
190 static const struct ib_field path_rec_table[] = {
191 	{ PATH_REC_FIELD(service_id),
192 	  .offset_words = 0,
193 	  .offset_bits  = 0,
194 	  .size_bits    = 64 },
195 	{ PATH_REC_FIELD(dgid),
196 	  .offset_words = 2,
197 	  .offset_bits  = 0,
198 	  .size_bits    = 128 },
199 	{ PATH_REC_FIELD(sgid),
200 	  .offset_words = 6,
201 	  .offset_bits  = 0,
202 	  .size_bits    = 128 },
203 	{ PATH_REC_FIELD(ib.dlid),
204 	  .offset_words = 10,
205 	  .offset_bits  = 0,
206 	  .size_bits    = 16 },
207 	{ PATH_REC_FIELD(ib.slid),
208 	  .offset_words = 10,
209 	  .offset_bits  = 16,
210 	  .size_bits    = 16 },
211 	{ PATH_REC_FIELD(ib.raw_traffic),
212 	  .offset_words = 11,
213 	  .offset_bits  = 0,
214 	  .size_bits    = 1 },
215 	{ RESERVED,
216 	  .offset_words = 11,
217 	  .offset_bits  = 1,
218 	  .size_bits    = 3 },
219 	{ PATH_REC_FIELD(flow_label),
220 	  .offset_words = 11,
221 	  .offset_bits  = 4,
222 	  .size_bits    = 20 },
223 	{ PATH_REC_FIELD(hop_limit),
224 	  .offset_words = 11,
225 	  .offset_bits  = 24,
226 	  .size_bits    = 8 },
227 	{ PATH_REC_FIELD(traffic_class),
228 	  .offset_words = 12,
229 	  .offset_bits  = 0,
230 	  .size_bits    = 8 },
231 	{ PATH_REC_FIELD(reversible),
232 	  .offset_words = 12,
233 	  .offset_bits  = 8,
234 	  .size_bits    = 1 },
235 	{ PATH_REC_FIELD(numb_path),
236 	  .offset_words = 12,
237 	  .offset_bits  = 9,
238 	  .size_bits    = 7 },
239 	{ PATH_REC_FIELD(pkey),
240 	  .offset_words = 12,
241 	  .offset_bits  = 16,
242 	  .size_bits    = 16 },
243 	{ PATH_REC_FIELD(qos_class),
244 	  .offset_words = 13,
245 	  .offset_bits  = 0,
246 	  .size_bits    = 12 },
247 	{ PATH_REC_FIELD(sl),
248 	  .offset_words = 13,
249 	  .offset_bits  = 12,
250 	  .size_bits    = 4 },
251 	{ PATH_REC_FIELD(mtu_selector),
252 	  .offset_words = 13,
253 	  .offset_bits  = 16,
254 	  .size_bits    = 2 },
255 	{ PATH_REC_FIELD(mtu),
256 	  .offset_words = 13,
257 	  .offset_bits  = 18,
258 	  .size_bits    = 6 },
259 	{ PATH_REC_FIELD(rate_selector),
260 	  .offset_words = 13,
261 	  .offset_bits  = 24,
262 	  .size_bits    = 2 },
263 	{ PATH_REC_FIELD(rate),
264 	  .offset_words = 13,
265 	  .offset_bits  = 26,
266 	  .size_bits    = 6 },
267 	{ PATH_REC_FIELD(packet_life_time_selector),
268 	  .offset_words = 14,
269 	  .offset_bits  = 0,
270 	  .size_bits    = 2 },
271 	{ PATH_REC_FIELD(packet_life_time),
272 	  .offset_words = 14,
273 	  .offset_bits  = 2,
274 	  .size_bits    = 6 },
275 	{ PATH_REC_FIELD(preference),
276 	  .offset_words = 14,
277 	  .offset_bits  = 8,
278 	  .size_bits    = 8 },
279 	{ RESERVED,
280 	  .offset_words = 14,
281 	  .offset_bits  = 16,
282 	  .size_bits    = 48 },
283 };
284 
285 #define OPA_PATH_REC_FIELD(field) \
286 	.struct_offset_bytes = \
287 		offsetof(struct sa_path_rec, field), \
288 	.struct_size_bytes   = \
289 		sizeof_field(struct sa_path_rec, field),	\
290 	.field_name          = "sa_path_rec:" #field
291 
292 static const struct ib_field opa_path_rec_table[] = {
293 	{ OPA_PATH_REC_FIELD(service_id),
294 	  .offset_words = 0,
295 	  .offset_bits  = 0,
296 	  .size_bits    = 64 },
297 	{ OPA_PATH_REC_FIELD(dgid),
298 	  .offset_words = 2,
299 	  .offset_bits  = 0,
300 	  .size_bits    = 128 },
301 	{ OPA_PATH_REC_FIELD(sgid),
302 	  .offset_words = 6,
303 	  .offset_bits  = 0,
304 	  .size_bits    = 128 },
305 	{ OPA_PATH_REC_FIELD(opa.dlid),
306 	  .offset_words = 10,
307 	  .offset_bits  = 0,
308 	  .size_bits    = 32 },
309 	{ OPA_PATH_REC_FIELD(opa.slid),
310 	  .offset_words = 11,
311 	  .offset_bits  = 0,
312 	  .size_bits    = 32 },
313 	{ OPA_PATH_REC_FIELD(opa.raw_traffic),
314 	  .offset_words = 12,
315 	  .offset_bits  = 0,
316 	  .size_bits    = 1 },
317 	{ RESERVED,
318 	  .offset_words = 12,
319 	  .offset_bits  = 1,
320 	  .size_bits    = 3 },
321 	{ OPA_PATH_REC_FIELD(flow_label),
322 	  .offset_words = 12,
323 	  .offset_bits  = 4,
324 	  .size_bits    = 20 },
325 	{ OPA_PATH_REC_FIELD(hop_limit),
326 	  .offset_words = 12,
327 	  .offset_bits  = 24,
328 	  .size_bits    = 8 },
329 	{ OPA_PATH_REC_FIELD(traffic_class),
330 	  .offset_words = 13,
331 	  .offset_bits  = 0,
332 	  .size_bits    = 8 },
333 	{ OPA_PATH_REC_FIELD(reversible),
334 	  .offset_words = 13,
335 	  .offset_bits  = 8,
336 	  .size_bits    = 1 },
337 	{ OPA_PATH_REC_FIELD(numb_path),
338 	  .offset_words = 13,
339 	  .offset_bits  = 9,
340 	  .size_bits    = 7 },
341 	{ OPA_PATH_REC_FIELD(pkey),
342 	  .offset_words = 13,
343 	  .offset_bits  = 16,
344 	  .size_bits    = 16 },
345 	{ OPA_PATH_REC_FIELD(opa.l2_8B),
346 	  .offset_words = 14,
347 	  .offset_bits  = 0,
348 	  .size_bits    = 1 },
349 	{ OPA_PATH_REC_FIELD(opa.l2_10B),
350 	  .offset_words = 14,
351 	  .offset_bits  = 1,
352 	  .size_bits    = 1 },
353 	{ OPA_PATH_REC_FIELD(opa.l2_9B),
354 	  .offset_words = 14,
355 	  .offset_bits  = 2,
356 	  .size_bits    = 1 },
357 	{ OPA_PATH_REC_FIELD(opa.l2_16B),
358 	  .offset_words = 14,
359 	  .offset_bits  = 3,
360 	  .size_bits    = 1 },
361 	{ RESERVED,
362 	  .offset_words = 14,
363 	  .offset_bits  = 4,
364 	  .size_bits    = 2 },
365 	{ OPA_PATH_REC_FIELD(opa.qos_type),
366 	  .offset_words = 14,
367 	  .offset_bits  = 6,
368 	  .size_bits    = 2 },
369 	{ OPA_PATH_REC_FIELD(opa.qos_priority),
370 	  .offset_words = 14,
371 	  .offset_bits  = 8,
372 	  .size_bits    = 8 },
373 	{ RESERVED,
374 	  .offset_words = 14,
375 	  .offset_bits  = 16,
376 	  .size_bits    = 3 },
377 	{ OPA_PATH_REC_FIELD(sl),
378 	  .offset_words = 14,
379 	  .offset_bits  = 19,
380 	  .size_bits    = 5 },
381 	{ RESERVED,
382 	  .offset_words = 14,
383 	  .offset_bits  = 24,
384 	  .size_bits    = 8 },
385 	{ OPA_PATH_REC_FIELD(mtu_selector),
386 	  .offset_words = 15,
387 	  .offset_bits  = 0,
388 	  .size_bits    = 2 },
389 	{ OPA_PATH_REC_FIELD(mtu),
390 	  .offset_words = 15,
391 	  .offset_bits  = 2,
392 	  .size_bits    = 6 },
393 	{ OPA_PATH_REC_FIELD(rate_selector),
394 	  .offset_words = 15,
395 	  .offset_bits  = 8,
396 	  .size_bits    = 2 },
397 	{ OPA_PATH_REC_FIELD(rate),
398 	  .offset_words = 15,
399 	  .offset_bits  = 10,
400 	  .size_bits    = 6 },
401 	{ OPA_PATH_REC_FIELD(packet_life_time_selector),
402 	  .offset_words = 15,
403 	  .offset_bits  = 16,
404 	  .size_bits    = 2 },
405 	{ OPA_PATH_REC_FIELD(packet_life_time),
406 	  .offset_words = 15,
407 	  .offset_bits  = 18,
408 	  .size_bits    = 6 },
409 	{ OPA_PATH_REC_FIELD(preference),
410 	  .offset_words = 15,
411 	  .offset_bits  = 24,
412 	  .size_bits    = 8 },
413 };
414 
415 #define MCMEMBER_REC_FIELD(field) \
416 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
417 	.struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),	\
418 	.field_name          = "sa_mcmember_rec:" #field
419 
420 static const struct ib_field mcmember_rec_table[] = {
421 	{ MCMEMBER_REC_FIELD(mgid),
422 	  .offset_words = 0,
423 	  .offset_bits  = 0,
424 	  .size_bits    = 128 },
425 	{ MCMEMBER_REC_FIELD(port_gid),
426 	  .offset_words = 4,
427 	  .offset_bits  = 0,
428 	  .size_bits    = 128 },
429 	{ MCMEMBER_REC_FIELD(qkey),
430 	  .offset_words = 8,
431 	  .offset_bits  = 0,
432 	  .size_bits    = 32 },
433 	{ MCMEMBER_REC_FIELD(mlid),
434 	  .offset_words = 9,
435 	  .offset_bits  = 0,
436 	  .size_bits    = 16 },
437 	{ MCMEMBER_REC_FIELD(mtu_selector),
438 	  .offset_words = 9,
439 	  .offset_bits  = 16,
440 	  .size_bits    = 2 },
441 	{ MCMEMBER_REC_FIELD(mtu),
442 	  .offset_words = 9,
443 	  .offset_bits  = 18,
444 	  .size_bits    = 6 },
445 	{ MCMEMBER_REC_FIELD(traffic_class),
446 	  .offset_words = 9,
447 	  .offset_bits  = 24,
448 	  .size_bits    = 8 },
449 	{ MCMEMBER_REC_FIELD(pkey),
450 	  .offset_words = 10,
451 	  .offset_bits  = 0,
452 	  .size_bits    = 16 },
453 	{ MCMEMBER_REC_FIELD(rate_selector),
454 	  .offset_words = 10,
455 	  .offset_bits  = 16,
456 	  .size_bits    = 2 },
457 	{ MCMEMBER_REC_FIELD(rate),
458 	  .offset_words = 10,
459 	  .offset_bits  = 18,
460 	  .size_bits    = 6 },
461 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
462 	  .offset_words = 10,
463 	  .offset_bits  = 24,
464 	  .size_bits    = 2 },
465 	{ MCMEMBER_REC_FIELD(packet_life_time),
466 	  .offset_words = 10,
467 	  .offset_bits  = 26,
468 	  .size_bits    = 6 },
469 	{ MCMEMBER_REC_FIELD(sl),
470 	  .offset_words = 11,
471 	  .offset_bits  = 0,
472 	  .size_bits    = 4 },
473 	{ MCMEMBER_REC_FIELD(flow_label),
474 	  .offset_words = 11,
475 	  .offset_bits  = 4,
476 	  .size_bits    = 20 },
477 	{ MCMEMBER_REC_FIELD(hop_limit),
478 	  .offset_words = 11,
479 	  .offset_bits  = 24,
480 	  .size_bits    = 8 },
481 	{ MCMEMBER_REC_FIELD(scope),
482 	  .offset_words = 12,
483 	  .offset_bits  = 0,
484 	  .size_bits    = 4 },
485 	{ MCMEMBER_REC_FIELD(join_state),
486 	  .offset_words = 12,
487 	  .offset_bits  = 4,
488 	  .size_bits    = 4 },
489 	{ MCMEMBER_REC_FIELD(proxy_join),
490 	  .offset_words = 12,
491 	  .offset_bits  = 8,
492 	  .size_bits    = 1 },
493 	{ RESERVED,
494 	  .offset_words = 12,
495 	  .offset_bits  = 9,
496 	  .size_bits    = 23 },
497 };
498 
499 #define CLASSPORTINFO_REC_FIELD(field) \
500 	.struct_offset_bytes = offsetof(struct ib_class_port_info, field),	\
501 	.struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),	\
502 	.field_name          = "ib_class_port_info:" #field
503 
504 static const struct ib_field ib_classport_info_rec_table[] = {
505 	{ CLASSPORTINFO_REC_FIELD(base_version),
506 	  .offset_words = 0,
507 	  .offset_bits  = 0,
508 	  .size_bits    = 8 },
509 	{ CLASSPORTINFO_REC_FIELD(class_version),
510 	  .offset_words = 0,
511 	  .offset_bits  = 8,
512 	  .size_bits    = 8 },
513 	{ CLASSPORTINFO_REC_FIELD(capability_mask),
514 	  .offset_words = 0,
515 	  .offset_bits  = 16,
516 	  .size_bits    = 16 },
517 	{ CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
518 	  .offset_words = 1,
519 	  .offset_bits  = 0,
520 	  .size_bits    = 32 },
521 	{ CLASSPORTINFO_REC_FIELD(redirect_gid),
522 	  .offset_words = 2,
523 	  .offset_bits  = 0,
524 	  .size_bits    = 128 },
525 	{ CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
526 	  .offset_words = 6,
527 	  .offset_bits  = 0,
528 	  .size_bits    = 32 },
529 	{ CLASSPORTINFO_REC_FIELD(redirect_lid),
530 	  .offset_words = 7,
531 	  .offset_bits  = 0,
532 	  .size_bits    = 16 },
533 	{ CLASSPORTINFO_REC_FIELD(redirect_pkey),
534 	  .offset_words = 7,
535 	  .offset_bits  = 16,
536 	  .size_bits    = 16 },
537 
538 	{ CLASSPORTINFO_REC_FIELD(redirect_qp),
539 	  .offset_words = 8,
540 	  .offset_bits  = 0,
541 	  .size_bits    = 32 },
542 	{ CLASSPORTINFO_REC_FIELD(redirect_qkey),
543 	  .offset_words = 9,
544 	  .offset_bits  = 0,
545 	  .size_bits    = 32 },
546 
547 	{ CLASSPORTINFO_REC_FIELD(trap_gid),
548 	  .offset_words = 10,
549 	  .offset_bits  = 0,
550 	  .size_bits    = 128 },
551 	{ CLASSPORTINFO_REC_FIELD(trap_tcslfl),
552 	  .offset_words = 14,
553 	  .offset_bits  = 0,
554 	  .size_bits    = 32 },
555 
556 	{ CLASSPORTINFO_REC_FIELD(trap_lid),
557 	  .offset_words = 15,
558 	  .offset_bits  = 0,
559 	  .size_bits    = 16 },
560 	{ CLASSPORTINFO_REC_FIELD(trap_pkey),
561 	  .offset_words = 15,
562 	  .offset_bits  = 16,
563 	  .size_bits    = 16 },
564 
565 	{ CLASSPORTINFO_REC_FIELD(trap_hlqp),
566 	  .offset_words = 16,
567 	  .offset_bits  = 0,
568 	  .size_bits    = 32 },
569 	{ CLASSPORTINFO_REC_FIELD(trap_qkey),
570 	  .offset_words = 17,
571 	  .offset_bits  = 0,
572 	  .size_bits    = 32 },
573 };
574 
575 #define OPA_CLASSPORTINFO_REC_FIELD(field) \
576 	.struct_offset_bytes =\
577 		offsetof(struct opa_class_port_info, field),	\
578 	.struct_size_bytes   = \
579 		sizeof_field(struct opa_class_port_info, field),	\
580 	.field_name          = "opa_class_port_info:" #field
581 
582 static const struct ib_field opa_classport_info_rec_table[] = {
583 	{ OPA_CLASSPORTINFO_REC_FIELD(base_version),
584 	  .offset_words = 0,
585 	  .offset_bits  = 0,
586 	  .size_bits    = 8 },
587 	{ OPA_CLASSPORTINFO_REC_FIELD(class_version),
588 	  .offset_words = 0,
589 	  .offset_bits  = 8,
590 	  .size_bits    = 8 },
591 	{ OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
592 	  .offset_words = 0,
593 	  .offset_bits  = 16,
594 	  .size_bits    = 16 },
595 	{ OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
596 	  .offset_words = 1,
597 	  .offset_bits  = 0,
598 	  .size_bits    = 32 },
599 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
600 	  .offset_words = 2,
601 	  .offset_bits  = 0,
602 	  .size_bits    = 128 },
603 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
604 	  .offset_words = 6,
605 	  .offset_bits  = 0,
606 	  .size_bits    = 32 },
607 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
608 	  .offset_words = 7,
609 	  .offset_bits  = 0,
610 	  .size_bits    = 32 },
611 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
612 	  .offset_words = 8,
613 	  .offset_bits  = 0,
614 	  .size_bits    = 32 },
615 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
616 	  .offset_words = 9,
617 	  .offset_bits  = 0,
618 	  .size_bits    = 32 },
619 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
620 	  .offset_words = 10,
621 	  .offset_bits  = 0,
622 	  .size_bits    = 128 },
623 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
624 	  .offset_words = 14,
625 	  .offset_bits  = 0,
626 	  .size_bits    = 32 },
627 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
628 	  .offset_words = 15,
629 	  .offset_bits  = 0,
630 	  .size_bits    = 32 },
631 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
632 	  .offset_words = 16,
633 	  .offset_bits  = 0,
634 	  .size_bits    = 32 },
635 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
636 	  .offset_words = 17,
637 	  .offset_bits  = 0,
638 	  .size_bits    = 32 },
639 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
640 	  .offset_words = 18,
641 	  .offset_bits  = 0,
642 	  .size_bits    = 16 },
643 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
644 	  .offset_words = 18,
645 	  .offset_bits  = 16,
646 	  .size_bits    = 16 },
647 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
648 	  .offset_words = 19,
649 	  .offset_bits  = 0,
650 	  .size_bits    = 8 },
651 	{ RESERVED,
652 	  .offset_words = 19,
653 	  .offset_bits  = 8,
654 	  .size_bits    = 24 },
655 };
656 
657 #define GUIDINFO_REC_FIELD(field) \
658 	.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),	\
659 	.struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),	\
660 	.field_name          = "sa_guidinfo_rec:" #field
661 
662 static const struct ib_field guidinfo_rec_table[] = {
663 	{ GUIDINFO_REC_FIELD(lid),
664 	  .offset_words = 0,
665 	  .offset_bits  = 0,
666 	  .size_bits    = 16 },
667 	{ GUIDINFO_REC_FIELD(block_num),
668 	  .offset_words = 0,
669 	  .offset_bits  = 16,
670 	  .size_bits    = 8 },
671 	{ GUIDINFO_REC_FIELD(res1),
672 	  .offset_words = 0,
673 	  .offset_bits  = 24,
674 	  .size_bits    = 8 },
675 	{ GUIDINFO_REC_FIELD(res2),
676 	  .offset_words = 1,
677 	  .offset_bits  = 0,
678 	  .size_bits    = 32 },
679 	{ GUIDINFO_REC_FIELD(guid_info_list),
680 	  .offset_words = 2,
681 	  .offset_bits  = 0,
682 	  .size_bits    = 512 },
683 };
684 
685 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
686 {
687 	query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
688 }
689 
690 static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
691 {
692 	return (query->flags & IB_SA_CANCEL);
693 }
694 
695 static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
696 				     struct ib_sa_query *query)
697 {
698 	struct sa_path_rec *sa_rec = query->mad_buf->context[1];
699 	struct ib_sa_mad *mad = query->mad_buf->mad;
700 	ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
701 	u16 val16;
702 	u64 val64;
703 	struct rdma_ls_resolve_header *header;
704 
705 	query->mad_buf->context[1] = NULL;
706 
707 	/* Construct the family header first */
708 	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
709 	strscpy_pad(header->device_name,
710 		    dev_name(&query->port->agent->device->dev),
711 		    LS_DEVICE_NAME_MAX);
712 	header->port_num = query->port->port_num;
713 
714 	if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
715 	    sa_rec->reversible != 0)
716 		query->path_use = LS_RESOLVE_PATH_USE_GMP;
717 	else
718 		query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
719 	header->path_use = query->path_use;
720 
721 	/* Now build the attributes */
722 	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
723 		val64 = be64_to_cpu(sa_rec->service_id);
724 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
725 			sizeof(val64), &val64);
726 	}
727 	if (comp_mask & IB_SA_PATH_REC_DGID)
728 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
729 			sizeof(sa_rec->dgid), &sa_rec->dgid);
730 	if (comp_mask & IB_SA_PATH_REC_SGID)
731 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
732 			sizeof(sa_rec->sgid), &sa_rec->sgid);
733 	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
734 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
735 			sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
736 
737 	if (comp_mask & IB_SA_PATH_REC_PKEY) {
738 		val16 = be16_to_cpu(sa_rec->pkey);
739 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
740 			sizeof(val16), &val16);
741 	}
742 	if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
743 		val16 = be16_to_cpu(sa_rec->qos_class);
744 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
745 			sizeof(val16), &val16);
746 	}
747 }
748 
749 static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
750 {
751 	int len = 0;
752 
753 	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
754 		len += nla_total_size(sizeof(u64));
755 	if (comp_mask & IB_SA_PATH_REC_DGID)
756 		len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
757 	if (comp_mask & IB_SA_PATH_REC_SGID)
758 		len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
759 	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
760 		len += nla_total_size(sizeof(u8));
761 	if (comp_mask & IB_SA_PATH_REC_PKEY)
762 		len += nla_total_size(sizeof(u16));
763 	if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
764 		len += nla_total_size(sizeof(u16));
765 
766 	/*
767 	 * Make sure that at least some of the required comp_mask bits are
768 	 * set.
769 	 */
770 	if (WARN_ON(len == 0))
771 		return len;
772 
773 	/* Add the family header */
774 	len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
775 
776 	return len;
777 }
778 
779 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
780 {
781 	struct sk_buff *skb = NULL;
782 	struct nlmsghdr *nlh;
783 	void *data;
784 	struct ib_sa_mad *mad;
785 	int len;
786 	unsigned long flags;
787 	unsigned long delay;
788 	gfp_t gfp_flag;
789 	int ret;
790 
791 	INIT_LIST_HEAD(&query->list);
792 	query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
793 
794 	mad = query->mad_buf->mad;
795 	len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
796 	if (len <= 0)
797 		return -EMSGSIZE;
798 
799 	skb = nlmsg_new(len, gfp_mask);
800 	if (!skb)
801 		return -ENOMEM;
802 
803 	/* Put nlmsg header only for now */
804 	data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
805 			    RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
806 	if (!data) {
807 		nlmsg_free(skb);
808 		return -EMSGSIZE;
809 	}
810 
811 	/* Add attributes */
812 	ib_nl_set_path_rec_attrs(skb, query);
813 
814 	/* Repair the nlmsg header length */
815 	nlmsg_end(skb, nlh);
816 
817 	gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
818 		GFP_NOWAIT;
819 
820 	spin_lock_irqsave(&ib_nl_request_lock, flags);
821 	ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
822 
823 	if (ret)
824 		goto out;
825 
826 	/* Put the request on the list.*/
827 	delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
828 	query->timeout = delay + jiffies;
829 	list_add_tail(&query->list, &ib_nl_request_list);
830 	/* Start the timeout if this is the only request */
831 	if (ib_nl_request_list.next == &query->list)
832 		queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
833 
834 out:
835 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
836 
837 	return ret;
838 }
839 
840 static int ib_nl_cancel_request(struct ib_sa_query *query)
841 {
842 	unsigned long flags;
843 	struct ib_sa_query *wait_query;
844 	int found = 0;
845 
846 	spin_lock_irqsave(&ib_nl_request_lock, flags);
847 	list_for_each_entry(wait_query, &ib_nl_request_list, list) {
848 		/* Let the timeout to take care of the callback */
849 		if (query == wait_query) {
850 			query->flags |= IB_SA_CANCEL;
851 			query->timeout = jiffies;
852 			list_move(&query->list, &ib_nl_request_list);
853 			found = 1;
854 			mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
855 			break;
856 		}
857 	}
858 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
859 
860 	return found;
861 }
862 
863 static void send_handler(struct ib_mad_agent *agent,
864 			 struct ib_mad_send_wc *mad_send_wc);
865 
866 static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
867 					   const struct nlmsghdr *nlh)
868 {
869 	struct ib_mad_send_wc mad_send_wc;
870 	struct ib_sa_mad *mad = NULL;
871 	const struct nlattr *head, *curr;
872 	struct ib_path_rec_data  *rec;
873 	int len, rem;
874 	u32 mask = 0;
875 	int status = -EIO;
876 
877 	if (query->callback) {
878 		head = (const struct nlattr *) nlmsg_data(nlh);
879 		len = nlmsg_len(nlh);
880 		switch (query->path_use) {
881 		case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
882 			mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
883 			break;
884 
885 		case LS_RESOLVE_PATH_USE_ALL:
886 		case LS_RESOLVE_PATH_USE_GMP:
887 		default:
888 			mask = IB_PATH_PRIMARY | IB_PATH_GMP |
889 				IB_PATH_BIDIRECTIONAL;
890 			break;
891 		}
892 		nla_for_each_attr(curr, head, len, rem) {
893 			if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
894 				rec = nla_data(curr);
895 				/*
896 				 * Get the first one. In the future, we may
897 				 * need to get up to 6 pathrecords.
898 				 */
899 				if ((rec->flags & mask) == mask) {
900 					mad = query->mad_buf->mad;
901 					mad->mad_hdr.method |=
902 						IB_MGMT_METHOD_RESP;
903 					memcpy(mad->data, rec->path_rec,
904 					       sizeof(rec->path_rec));
905 					status = 0;
906 					break;
907 				}
908 			}
909 		}
910 		query->callback(query, status, mad);
911 	}
912 
913 	mad_send_wc.send_buf = query->mad_buf;
914 	mad_send_wc.status = IB_WC_SUCCESS;
915 	send_handler(query->mad_buf->mad_agent, &mad_send_wc);
916 }
917 
918 static void ib_nl_request_timeout(struct work_struct *work)
919 {
920 	unsigned long flags;
921 	struct ib_sa_query *query;
922 	unsigned long delay;
923 	struct ib_mad_send_wc mad_send_wc;
924 	int ret;
925 
926 	spin_lock_irqsave(&ib_nl_request_lock, flags);
927 	while (!list_empty(&ib_nl_request_list)) {
928 		query = list_entry(ib_nl_request_list.next,
929 				   struct ib_sa_query, list);
930 
931 		if (time_after(query->timeout, jiffies)) {
932 			delay = query->timeout - jiffies;
933 			if ((long)delay <= 0)
934 				delay = 1;
935 			queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
936 			break;
937 		}
938 
939 		list_del(&query->list);
940 		ib_sa_disable_local_svc(query);
941 		/* Hold the lock to protect against query cancellation */
942 		if (ib_sa_query_cancelled(query))
943 			ret = -1;
944 		else
945 			ret = ib_post_send_mad(query->mad_buf, NULL);
946 		if (ret) {
947 			mad_send_wc.send_buf = query->mad_buf;
948 			mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
949 			spin_unlock_irqrestore(&ib_nl_request_lock, flags);
950 			send_handler(query->port->agent, &mad_send_wc);
951 			spin_lock_irqsave(&ib_nl_request_lock, flags);
952 		}
953 	}
954 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
955 }
956 
957 int ib_nl_handle_set_timeout(struct sk_buff *skb,
958 			     struct nlmsghdr *nlh,
959 			     struct netlink_ext_ack *extack)
960 {
961 	int timeout, delta, abs_delta;
962 	const struct nlattr *attr;
963 	unsigned long flags;
964 	struct ib_sa_query *query;
965 	long delay = 0;
966 	struct nlattr *tb[LS_NLA_TYPE_MAX];
967 	int ret;
968 
969 	if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
970 	    !(NETLINK_CB(skb).sk))
971 		return -EPERM;
972 
973 	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
974 				   nlmsg_len(nlh), ib_nl_policy, NULL);
975 	attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
976 	if (ret || !attr)
977 		goto settimeout_out;
978 
979 	timeout = *(int *) nla_data(attr);
980 	if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
981 		timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
982 	if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
983 		timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
984 
985 	delta = timeout - sa_local_svc_timeout_ms;
986 	if (delta < 0)
987 		abs_delta = -delta;
988 	else
989 		abs_delta = delta;
990 
991 	if (delta != 0) {
992 		spin_lock_irqsave(&ib_nl_request_lock, flags);
993 		sa_local_svc_timeout_ms = timeout;
994 		list_for_each_entry(query, &ib_nl_request_list, list) {
995 			if (delta < 0 && abs_delta > query->timeout)
996 				query->timeout = 0;
997 			else
998 				query->timeout += delta;
999 
1000 			/* Get the new delay from the first entry */
1001 			if (!delay) {
1002 				delay = query->timeout - jiffies;
1003 				if (delay <= 0)
1004 					delay = 1;
1005 			}
1006 		}
1007 		if (delay)
1008 			mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1009 					 (unsigned long)delay);
1010 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1011 	}
1012 
1013 settimeout_out:
1014 	return 0;
1015 }
1016 
1017 static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1018 {
1019 	struct nlattr *tb[LS_NLA_TYPE_MAX];
1020 	int ret;
1021 
1022 	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1023 		return 0;
1024 
1025 	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1026 				   nlmsg_len(nlh), ib_nl_policy, NULL);
1027 	if (ret)
1028 		return 0;
1029 
1030 	return 1;
1031 }
1032 
1033 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1034 			      struct nlmsghdr *nlh,
1035 			      struct netlink_ext_ack *extack)
1036 {
1037 	unsigned long flags;
1038 	struct ib_sa_query *query;
1039 	struct ib_mad_send_buf *send_buf;
1040 	struct ib_mad_send_wc mad_send_wc;
1041 	int found = 0;
1042 	int ret;
1043 
1044 	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1045 	    !(NETLINK_CB(skb).sk))
1046 		return -EPERM;
1047 
1048 	spin_lock_irqsave(&ib_nl_request_lock, flags);
1049 	list_for_each_entry(query, &ib_nl_request_list, list) {
1050 		/*
1051 		 * If the query is cancelled, let the timeout routine
1052 		 * take care of it.
1053 		 */
1054 		if (nlh->nlmsg_seq == query->seq) {
1055 			found = !ib_sa_query_cancelled(query);
1056 			if (found)
1057 				list_del(&query->list);
1058 			break;
1059 		}
1060 	}
1061 
1062 	if (!found) {
1063 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1064 		goto resp_out;
1065 	}
1066 
1067 	send_buf = query->mad_buf;
1068 
1069 	if (!ib_nl_is_good_resolve_resp(nlh)) {
1070 		/* if the result is a failure, send out the packet via IB */
1071 		ib_sa_disable_local_svc(query);
1072 		ret = ib_post_send_mad(query->mad_buf, NULL);
1073 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1074 		if (ret) {
1075 			mad_send_wc.send_buf = send_buf;
1076 			mad_send_wc.status = IB_WC_GENERAL_ERR;
1077 			send_handler(query->port->agent, &mad_send_wc);
1078 		}
1079 	} else {
1080 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1081 		ib_nl_process_good_resolve_rsp(query, nlh);
1082 	}
1083 
1084 resp_out:
1085 	return 0;
1086 }
1087 
1088 static void free_sm_ah(struct kref *kref)
1089 {
1090 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1091 
1092 	rdma_destroy_ah(sm_ah->ah, 0);
1093 	kfree(sm_ah);
1094 }
1095 
1096 void ib_sa_register_client(struct ib_sa_client *client)
1097 {
1098 	atomic_set(&client->users, 1);
1099 	init_completion(&client->comp);
1100 }
1101 EXPORT_SYMBOL(ib_sa_register_client);
1102 
1103 void ib_sa_unregister_client(struct ib_sa_client *client)
1104 {
1105 	ib_sa_client_put(client);
1106 	wait_for_completion(&client->comp);
1107 }
1108 EXPORT_SYMBOL(ib_sa_unregister_client);
1109 
1110 /**
1111  * ib_sa_cancel_query - try to cancel an SA query
1112  * @id:ID of query to cancel
1113  * @query:query pointer to cancel
1114  *
1115  * Try to cancel an SA query.  If the id and query don't match up or
1116  * the query has already completed, nothing is done.  Otherwise the
1117  * query is canceled and will complete with a status of -EINTR.
1118  */
1119 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1120 {
1121 	unsigned long flags;
1122 	struct ib_mad_send_buf *mad_buf;
1123 
1124 	xa_lock_irqsave(&queries, flags);
1125 	if (xa_load(&queries, id) != query) {
1126 		xa_unlock_irqrestore(&queries, flags);
1127 		return;
1128 	}
1129 	mad_buf = query->mad_buf;
1130 	xa_unlock_irqrestore(&queries, flags);
1131 
1132 	/*
1133 	 * If the query is still on the netlink request list, schedule
1134 	 * it to be cancelled by the timeout routine. Otherwise, it has been
1135 	 * sent to the MAD layer and has to be cancelled from there.
1136 	 */
1137 	if (!ib_nl_cancel_request(query))
1138 		ib_cancel_mad(mad_buf);
1139 }
1140 EXPORT_SYMBOL(ib_sa_cancel_query);
1141 
1142 static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1143 {
1144 	struct ib_sa_device *sa_dev;
1145 	struct ib_sa_port   *port;
1146 	unsigned long flags;
1147 	u8 src_path_mask;
1148 
1149 	sa_dev = ib_get_client_data(device, &sa_client);
1150 	if (!sa_dev)
1151 		return 0x7f;
1152 
1153 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1154 	spin_lock_irqsave(&port->ah_lock, flags);
1155 	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1156 	spin_unlock_irqrestore(&port->ah_lock, flags);
1157 
1158 	return src_path_mask;
1159 }
1160 
1161 static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1162 				   struct sa_path_rec *rec,
1163 				   struct rdma_ah_attr *ah_attr,
1164 				   const struct ib_gid_attr *gid_attr)
1165 {
1166 	enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1167 
1168 	if (!gid_attr) {
1169 		gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1170 						 port_num, NULL);
1171 		if (IS_ERR(gid_attr))
1172 			return PTR_ERR(gid_attr);
1173 	} else
1174 		rdma_hold_gid_attr(gid_attr);
1175 
1176 	rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1177 				be32_to_cpu(rec->flow_label),
1178 				rec->hop_limit,	rec->traffic_class,
1179 				gid_attr);
1180 	return 0;
1181 }
1182 
1183 /**
1184  * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1185  *   an SA path record.
1186  * @device: Device associated ah attributes initialization.
1187  * @port_num: Port on the specified device.
1188  * @rec: path record entry to use for ah attributes initialization.
1189  * @ah_attr: address handle attributes to initialization from path record.
1190  * @gid_attr: SGID attribute to consider during initialization.
1191  *
1192  * When ib_init_ah_attr_from_path() returns success,
1193  * (a) for IB link layer it optionally contains a reference to SGID attribute
1194  * when GRH is present for IB link layer.
1195  * (b) for RoCE link layer it contains a reference to SGID attribute.
1196  * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1197  * attributes which are initialized using ib_init_ah_attr_from_path().
1198  */
1199 int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1200 			      struct sa_path_rec *rec,
1201 			      struct rdma_ah_attr *ah_attr,
1202 			      const struct ib_gid_attr *gid_attr)
1203 {
1204 	int ret = 0;
1205 
1206 	memset(ah_attr, 0, sizeof(*ah_attr));
1207 	ah_attr->type = rdma_ah_find_type(device, port_num);
1208 	rdma_ah_set_sl(ah_attr, rec->sl);
1209 	rdma_ah_set_port_num(ah_attr, port_num);
1210 	rdma_ah_set_static_rate(ah_attr, rec->rate);
1211 
1212 	if (sa_path_is_roce(rec)) {
1213 		ret = roce_resolve_route_from_path(rec, gid_attr);
1214 		if (ret)
1215 			return ret;
1216 
1217 		memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1218 	} else {
1219 		rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1220 		if (sa_path_is_opa(rec) &&
1221 		    rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1222 			rdma_ah_set_make_grd(ah_attr, true);
1223 
1224 		rdma_ah_set_path_bits(ah_attr,
1225 				      be32_to_cpu(sa_path_get_slid(rec)) &
1226 				      get_src_path_mask(device, port_num));
1227 	}
1228 
1229 	if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1230 		ret = init_ah_attr_grh_fields(device, port_num,
1231 					      rec, ah_attr, gid_attr);
1232 	return ret;
1233 }
1234 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1235 
1236 static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1237 {
1238 	struct rdma_ah_attr ah_attr;
1239 	unsigned long flags;
1240 
1241 	spin_lock_irqsave(&query->port->ah_lock, flags);
1242 	if (!query->port->sm_ah) {
1243 		spin_unlock_irqrestore(&query->port->ah_lock, flags);
1244 		return -EAGAIN;
1245 	}
1246 	kref_get(&query->port->sm_ah->ref);
1247 	query->sm_ah = query->port->sm_ah;
1248 	spin_unlock_irqrestore(&query->port->ah_lock, flags);
1249 
1250 	/*
1251 	 * Always check if sm_ah has valid dlid assigned,
1252 	 * before querying for class port info
1253 	 */
1254 	if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1255 	    !rdma_is_valid_unicast_lid(&ah_attr)) {
1256 		kref_put(&query->sm_ah->ref, free_sm_ah);
1257 		return -EAGAIN;
1258 	}
1259 	query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1260 					    query->sm_ah->pkey_index,
1261 					    0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1262 					    gfp_mask,
1263 					    ((query->flags & IB_SA_QUERY_OPA) ?
1264 					     OPA_MGMT_BASE_VERSION :
1265 					     IB_MGMT_BASE_VERSION));
1266 	if (IS_ERR(query->mad_buf)) {
1267 		kref_put(&query->sm_ah->ref, free_sm_ah);
1268 		return -ENOMEM;
1269 	}
1270 
1271 	query->mad_buf->ah = query->sm_ah->ah;
1272 
1273 	return 0;
1274 }
1275 
1276 static void free_mad(struct ib_sa_query *query)
1277 {
1278 	ib_free_send_mad(query->mad_buf);
1279 	kref_put(&query->sm_ah->ref, free_sm_ah);
1280 }
1281 
1282 static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1283 {
1284 	struct ib_sa_mad *mad = query->mad_buf->mad;
1285 	unsigned long flags;
1286 
1287 	memset(mad, 0, sizeof *mad);
1288 
1289 	if (query->flags & IB_SA_QUERY_OPA) {
1290 		mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
1291 		mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1292 	} else {
1293 		mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1294 		mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1295 	}
1296 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1297 	spin_lock_irqsave(&tid_lock, flags);
1298 	mad->mad_hdr.tid           =
1299 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1300 	spin_unlock_irqrestore(&tid_lock, flags);
1301 }
1302 
1303 static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1304 		    gfp_t gfp_mask)
1305 {
1306 	unsigned long flags;
1307 	int ret, id;
1308 	const int nmbr_sa_query_retries = 10;
1309 
1310 	xa_lock_irqsave(&queries, flags);
1311 	ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1312 	xa_unlock_irqrestore(&queries, flags);
1313 	if (ret < 0)
1314 		return ret;
1315 
1316 	query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
1317 	query->mad_buf->retries = nmbr_sa_query_retries;
1318 	if (!query->mad_buf->timeout_ms) {
1319 		/* Special case, very small timeout_ms */
1320 		query->mad_buf->timeout_ms = 1;
1321 		query->mad_buf->retries = timeout_ms;
1322 	}
1323 	query->mad_buf->context[0] = query;
1324 	query->id = id;
1325 
1326 	if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1327 	    (!(query->flags & IB_SA_QUERY_OPA))) {
1328 		if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1329 			if (!ib_nl_make_request(query, gfp_mask))
1330 				return id;
1331 		}
1332 		ib_sa_disable_local_svc(query);
1333 	}
1334 
1335 	ret = ib_post_send_mad(query->mad_buf, NULL);
1336 	if (ret) {
1337 		xa_lock_irqsave(&queries, flags);
1338 		__xa_erase(&queries, id);
1339 		xa_unlock_irqrestore(&queries, flags);
1340 	}
1341 
1342 	/*
1343 	 * It's not safe to dereference query any more, because the
1344 	 * send may already have completed and freed the query in
1345 	 * another context.
1346 	 */
1347 	return ret ? ret : id;
1348 }
1349 
1350 void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1351 {
1352 	ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1353 }
1354 EXPORT_SYMBOL(ib_sa_unpack_path);
1355 
1356 void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1357 {
1358 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1359 }
1360 EXPORT_SYMBOL(ib_sa_pack_path);
1361 
1362 static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1363 					 struct ib_sa_device *sa_dev,
1364 					 u32 port_num)
1365 {
1366 	struct ib_sa_port *port;
1367 	unsigned long flags;
1368 	bool ret = false;
1369 
1370 	port = &sa_dev->port[port_num - sa_dev->start_port];
1371 	spin_lock_irqsave(&port->classport_lock, flags);
1372 	if (!port->classport_info.valid)
1373 		goto ret;
1374 
1375 	if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1376 		ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1377 			OPA_CLASS_PORT_INFO_PR_SUPPORT;
1378 ret:
1379 	spin_unlock_irqrestore(&port->classport_lock, flags);
1380 	return ret;
1381 }
1382 
1383 enum opa_pr_supported {
1384 	PR_NOT_SUPPORTED,
1385 	PR_OPA_SUPPORTED,
1386 	PR_IB_SUPPORTED
1387 };
1388 
1389 /*
1390  * opa_pr_query_possible - Check if current PR query can be an OPA query.
1391  *
1392  * Retuns PR_NOT_SUPPORTED if a path record query is not
1393  * possible, PR_OPA_SUPPORTED if an OPA path record query
1394  * is possible and PR_IB_SUPPORTED if an IB path record
1395  * query is possible.
1396  */
1397 static int opa_pr_query_possible(struct ib_sa_client *client,
1398 				 struct ib_sa_device *sa_dev,
1399 				 struct ib_device *device, u32 port_num)
1400 {
1401 	struct ib_port_attr port_attr;
1402 
1403 	if (ib_query_port(device, port_num, &port_attr))
1404 		return PR_NOT_SUPPORTED;
1405 
1406 	if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1407 		return PR_OPA_SUPPORTED;
1408 
1409 	if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1410 		return PR_NOT_SUPPORTED;
1411 	else
1412 		return PR_IB_SUPPORTED;
1413 }
1414 
1415 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1416 				    int status,
1417 				    struct ib_sa_mad *mad)
1418 {
1419 	struct ib_sa_path_query *query =
1420 		container_of(sa_query, struct ib_sa_path_query, sa_query);
1421 
1422 	if (mad) {
1423 		struct sa_path_rec rec;
1424 
1425 		if (sa_query->flags & IB_SA_QUERY_OPA) {
1426 			ib_unpack(opa_path_rec_table,
1427 				  ARRAY_SIZE(opa_path_rec_table),
1428 				  mad->data, &rec);
1429 			rec.rec_type = SA_PATH_REC_TYPE_OPA;
1430 			query->callback(status, &rec, query->context);
1431 		} else {
1432 			ib_unpack(path_rec_table,
1433 				  ARRAY_SIZE(path_rec_table),
1434 				  mad->data, &rec);
1435 			rec.rec_type = SA_PATH_REC_TYPE_IB;
1436 			sa_path_set_dmac_zero(&rec);
1437 
1438 			if (query->conv_pr) {
1439 				struct sa_path_rec opa;
1440 
1441 				memset(&opa, 0, sizeof(struct sa_path_rec));
1442 				sa_convert_path_ib_to_opa(&opa, &rec);
1443 				query->callback(status, &opa, query->context);
1444 			} else {
1445 				query->callback(status, &rec, query->context);
1446 			}
1447 		}
1448 	} else
1449 		query->callback(status, NULL, query->context);
1450 }
1451 
1452 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1453 {
1454 	struct ib_sa_path_query *query =
1455 		container_of(sa_query, struct ib_sa_path_query, sa_query);
1456 
1457 	kfree(query->conv_pr);
1458 	kfree(query);
1459 }
1460 
1461 /**
1462  * ib_sa_path_rec_get - Start a Path get query
1463  * @client:SA client
1464  * @device:device to send query on
1465  * @port_num: port number to send query on
1466  * @rec:Path Record to send in query
1467  * @comp_mask:component mask to send in query
1468  * @timeout_ms:time to wait for response
1469  * @gfp_mask:GFP mask to use for internal allocations
1470  * @callback:function called when query completes, times out or is
1471  * canceled
1472  * @context:opaque user context passed to callback
1473  * @sa_query:query context, used to cancel query
1474  *
1475  * Send a Path Record Get query to the SA to look up a path.  The
1476  * callback function will be called when the query completes (or
1477  * fails); status is 0 for a successful response, -EINTR if the query
1478  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1479  * occurred sending the query.  The resp parameter of the callback is
1480  * only valid if status is 0.
1481  *
1482  * If the return value of ib_sa_path_rec_get() is negative, it is an
1483  * error code.  Otherwise it is a query ID that can be used to cancel
1484  * the query.
1485  */
1486 int ib_sa_path_rec_get(struct ib_sa_client *client,
1487 		       struct ib_device *device, u32 port_num,
1488 		       struct sa_path_rec *rec,
1489 		       ib_sa_comp_mask comp_mask,
1490 		       unsigned long timeout_ms, gfp_t gfp_mask,
1491 		       void (*callback)(int status,
1492 					struct sa_path_rec *resp,
1493 					void *context),
1494 		       void *context,
1495 		       struct ib_sa_query **sa_query)
1496 {
1497 	struct ib_sa_path_query *query;
1498 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1499 	struct ib_sa_port   *port;
1500 	struct ib_mad_agent *agent;
1501 	struct ib_sa_mad *mad;
1502 	enum opa_pr_supported status;
1503 	int ret;
1504 
1505 	if (!sa_dev)
1506 		return -ENODEV;
1507 
1508 	if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1509 	    (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1510 		return -EINVAL;
1511 
1512 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1513 	agent = port->agent;
1514 
1515 	query = kzalloc(sizeof(*query), gfp_mask);
1516 	if (!query)
1517 		return -ENOMEM;
1518 
1519 	query->sa_query.port     = port;
1520 	if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1521 		status = opa_pr_query_possible(client, sa_dev, device, port_num);
1522 		if (status == PR_NOT_SUPPORTED) {
1523 			ret = -EINVAL;
1524 			goto err1;
1525 		} else if (status == PR_OPA_SUPPORTED) {
1526 			query->sa_query.flags |= IB_SA_QUERY_OPA;
1527 		} else {
1528 			query->conv_pr =
1529 				kmalloc(sizeof(*query->conv_pr), gfp_mask);
1530 			if (!query->conv_pr) {
1531 				ret = -ENOMEM;
1532 				goto err1;
1533 			}
1534 		}
1535 	}
1536 
1537 	ret = alloc_mad(&query->sa_query, gfp_mask);
1538 	if (ret)
1539 		goto err2;
1540 
1541 	ib_sa_client_get(client);
1542 	query->sa_query.client = client;
1543 	query->callback        = callback;
1544 	query->context         = context;
1545 
1546 	mad = query->sa_query.mad_buf->mad;
1547 	init_mad(&query->sa_query, agent);
1548 
1549 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1550 	query->sa_query.release  = ib_sa_path_rec_release;
1551 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
1552 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1553 	mad->sa_hdr.comp_mask	 = comp_mask;
1554 
1555 	if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1556 		ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1557 			rec, mad->data);
1558 	} else if (query->conv_pr) {
1559 		sa_convert_path_opa_to_ib(query->conv_pr, rec);
1560 		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1561 			query->conv_pr, mad->data);
1562 	} else {
1563 		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1564 			rec, mad->data);
1565 	}
1566 
1567 	*sa_query = &query->sa_query;
1568 
1569 	query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1570 	query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1571 						query->conv_pr : rec;
1572 
1573 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1574 	if (ret < 0)
1575 		goto err3;
1576 
1577 	return ret;
1578 
1579 err3:
1580 	*sa_query = NULL;
1581 	ib_sa_client_put(query->sa_query.client);
1582 	free_mad(&query->sa_query);
1583 err2:
1584 	kfree(query->conv_pr);
1585 err1:
1586 	kfree(query);
1587 	return ret;
1588 }
1589 EXPORT_SYMBOL(ib_sa_path_rec_get);
1590 
1591 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1592 					int status,
1593 					struct ib_sa_mad *mad)
1594 {
1595 	struct ib_sa_mcmember_query *query =
1596 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1597 
1598 	if (mad) {
1599 		struct ib_sa_mcmember_rec rec;
1600 
1601 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1602 			  mad->data, &rec);
1603 		query->callback(status, &rec, query->context);
1604 	} else
1605 		query->callback(status, NULL, query->context);
1606 }
1607 
1608 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1609 {
1610 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1611 }
1612 
1613 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1614 			     struct ib_device *device, u32 port_num,
1615 			     u8 method,
1616 			     struct ib_sa_mcmember_rec *rec,
1617 			     ib_sa_comp_mask comp_mask,
1618 			     unsigned long timeout_ms, gfp_t gfp_mask,
1619 			     void (*callback)(int status,
1620 					      struct ib_sa_mcmember_rec *resp,
1621 					      void *context),
1622 			     void *context,
1623 			     struct ib_sa_query **sa_query)
1624 {
1625 	struct ib_sa_mcmember_query *query;
1626 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1627 	struct ib_sa_port   *port;
1628 	struct ib_mad_agent *agent;
1629 	struct ib_sa_mad *mad;
1630 	int ret;
1631 
1632 	if (!sa_dev)
1633 		return -ENODEV;
1634 
1635 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1636 	agent = port->agent;
1637 
1638 	query = kzalloc(sizeof(*query), gfp_mask);
1639 	if (!query)
1640 		return -ENOMEM;
1641 
1642 	query->sa_query.port     = port;
1643 	ret = alloc_mad(&query->sa_query, gfp_mask);
1644 	if (ret)
1645 		goto err1;
1646 
1647 	ib_sa_client_get(client);
1648 	query->sa_query.client = client;
1649 	query->callback        = callback;
1650 	query->context         = context;
1651 
1652 	mad = query->sa_query.mad_buf->mad;
1653 	init_mad(&query->sa_query, agent);
1654 
1655 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1656 	query->sa_query.release  = ib_sa_mcmember_rec_release;
1657 	mad->mad_hdr.method	 = method;
1658 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1659 	mad->sa_hdr.comp_mask	 = comp_mask;
1660 
1661 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1662 		rec, mad->data);
1663 
1664 	*sa_query = &query->sa_query;
1665 
1666 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1667 	if (ret < 0)
1668 		goto err2;
1669 
1670 	return ret;
1671 
1672 err2:
1673 	*sa_query = NULL;
1674 	ib_sa_client_put(query->sa_query.client);
1675 	free_mad(&query->sa_query);
1676 
1677 err1:
1678 	kfree(query);
1679 	return ret;
1680 }
1681 
1682 /* Support GuidInfoRecord */
1683 static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1684 					int status,
1685 					struct ib_sa_mad *mad)
1686 {
1687 	struct ib_sa_guidinfo_query *query =
1688 		container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1689 
1690 	if (mad) {
1691 		struct ib_sa_guidinfo_rec rec;
1692 
1693 		ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1694 			  mad->data, &rec);
1695 		query->callback(status, &rec, query->context);
1696 	} else
1697 		query->callback(status, NULL, query->context);
1698 }
1699 
1700 static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1701 {
1702 	kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1703 }
1704 
1705 int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1706 			      struct ib_device *device, u32 port_num,
1707 			      struct ib_sa_guidinfo_rec *rec,
1708 			      ib_sa_comp_mask comp_mask, u8 method,
1709 			      unsigned long timeout_ms, gfp_t gfp_mask,
1710 			      void (*callback)(int status,
1711 					       struct ib_sa_guidinfo_rec *resp,
1712 					       void *context),
1713 			      void *context,
1714 			      struct ib_sa_query **sa_query)
1715 {
1716 	struct ib_sa_guidinfo_query *query;
1717 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1718 	struct ib_sa_port *port;
1719 	struct ib_mad_agent *agent;
1720 	struct ib_sa_mad *mad;
1721 	int ret;
1722 
1723 	if (!sa_dev)
1724 		return -ENODEV;
1725 
1726 	if (method != IB_MGMT_METHOD_GET &&
1727 	    method != IB_MGMT_METHOD_SET &&
1728 	    method != IB_SA_METHOD_DELETE) {
1729 		return -EINVAL;
1730 	}
1731 
1732 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1733 	agent = port->agent;
1734 
1735 	query = kzalloc(sizeof(*query), gfp_mask);
1736 	if (!query)
1737 		return -ENOMEM;
1738 
1739 	query->sa_query.port = port;
1740 	ret = alloc_mad(&query->sa_query, gfp_mask);
1741 	if (ret)
1742 		goto err1;
1743 
1744 	ib_sa_client_get(client);
1745 	query->sa_query.client = client;
1746 	query->callback        = callback;
1747 	query->context         = context;
1748 
1749 	mad = query->sa_query.mad_buf->mad;
1750 	init_mad(&query->sa_query, agent);
1751 
1752 	query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1753 	query->sa_query.release  = ib_sa_guidinfo_rec_release;
1754 
1755 	mad->mad_hdr.method	 = method;
1756 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1757 	mad->sa_hdr.comp_mask	 = comp_mask;
1758 
1759 	ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1760 		mad->data);
1761 
1762 	*sa_query = &query->sa_query;
1763 
1764 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1765 	if (ret < 0)
1766 		goto err2;
1767 
1768 	return ret;
1769 
1770 err2:
1771 	*sa_query = NULL;
1772 	ib_sa_client_put(query->sa_query.client);
1773 	free_mad(&query->sa_query);
1774 
1775 err1:
1776 	kfree(query);
1777 	return ret;
1778 }
1779 EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1780 
1781 struct ib_classport_info_context {
1782 	struct completion	done;
1783 	struct ib_sa_query	*sa_query;
1784 };
1785 
1786 static void ib_classportinfo_cb(void *context)
1787 {
1788 	struct ib_classport_info_context *cb_ctx = context;
1789 
1790 	complete(&cb_ctx->done);
1791 }
1792 
1793 static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1794 					      int status,
1795 					      struct ib_sa_mad *mad)
1796 {
1797 	unsigned long flags;
1798 	struct ib_sa_classport_info_query *query =
1799 		container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1800 	struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1801 
1802 	if (mad) {
1803 		if (sa_query->flags & IB_SA_QUERY_OPA) {
1804 			struct opa_class_port_info rec;
1805 
1806 			ib_unpack(opa_classport_info_rec_table,
1807 				  ARRAY_SIZE(opa_classport_info_rec_table),
1808 				  mad->data, &rec);
1809 
1810 			spin_lock_irqsave(&sa_query->port->classport_lock,
1811 					  flags);
1812 			if (!status && !info->valid) {
1813 				memcpy(&info->data.opa, &rec,
1814 				       sizeof(info->data.opa));
1815 
1816 				info->valid = true;
1817 				info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1818 			}
1819 			spin_unlock_irqrestore(&sa_query->port->classport_lock,
1820 					       flags);
1821 
1822 		} else {
1823 			struct ib_class_port_info rec;
1824 
1825 			ib_unpack(ib_classport_info_rec_table,
1826 				  ARRAY_SIZE(ib_classport_info_rec_table),
1827 				  mad->data, &rec);
1828 
1829 			spin_lock_irqsave(&sa_query->port->classport_lock,
1830 					  flags);
1831 			if (!status && !info->valid) {
1832 				memcpy(&info->data.ib, &rec,
1833 				       sizeof(info->data.ib));
1834 
1835 				info->valid = true;
1836 				info->data.type = RDMA_CLASS_PORT_INFO_IB;
1837 			}
1838 			spin_unlock_irqrestore(&sa_query->port->classport_lock,
1839 					       flags);
1840 		}
1841 	}
1842 	query->callback(query->context);
1843 }
1844 
1845 static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1846 {
1847 	kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1848 			   sa_query));
1849 }
1850 
1851 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1852 					  unsigned long timeout_ms,
1853 					  void (*callback)(void *context),
1854 					  void *context,
1855 					  struct ib_sa_query **sa_query)
1856 {
1857 	struct ib_mad_agent *agent;
1858 	struct ib_sa_classport_info_query *query;
1859 	struct ib_sa_mad *mad;
1860 	gfp_t gfp_mask = GFP_KERNEL;
1861 	int ret;
1862 
1863 	agent = port->agent;
1864 
1865 	query = kzalloc(sizeof(*query), gfp_mask);
1866 	if (!query)
1867 		return -ENOMEM;
1868 
1869 	query->sa_query.port = port;
1870 	query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1871 						 port->port_num) ?
1872 				 IB_SA_QUERY_OPA : 0;
1873 	ret = alloc_mad(&query->sa_query, gfp_mask);
1874 	if (ret)
1875 		goto err_free;
1876 
1877 	query->callback = callback;
1878 	query->context = context;
1879 
1880 	mad = query->sa_query.mad_buf->mad;
1881 	init_mad(&query->sa_query, agent);
1882 
1883 	query->sa_query.callback = ib_sa_classport_info_rec_callback;
1884 	query->sa_query.release  = ib_sa_classport_info_rec_release;
1885 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
1886 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1887 	mad->sa_hdr.comp_mask	 = 0;
1888 	*sa_query = &query->sa_query;
1889 
1890 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1891 	if (ret < 0)
1892 		goto err_free_mad;
1893 
1894 	return ret;
1895 
1896 err_free_mad:
1897 	*sa_query = NULL;
1898 	free_mad(&query->sa_query);
1899 
1900 err_free:
1901 	kfree(query);
1902 	return ret;
1903 }
1904 
1905 static void update_ib_cpi(struct work_struct *work)
1906 {
1907 	struct ib_sa_port *port =
1908 		container_of(work, struct ib_sa_port, ib_cpi_work.work);
1909 	struct ib_classport_info_context *cb_context;
1910 	unsigned long flags;
1911 	int ret;
1912 
1913 	/* If the classport info is valid, nothing
1914 	 * to do here.
1915 	 */
1916 	spin_lock_irqsave(&port->classport_lock, flags);
1917 	if (port->classport_info.valid) {
1918 		spin_unlock_irqrestore(&port->classport_lock, flags);
1919 		return;
1920 	}
1921 	spin_unlock_irqrestore(&port->classport_lock, flags);
1922 
1923 	cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1924 	if (!cb_context)
1925 		goto err_nomem;
1926 
1927 	init_completion(&cb_context->done);
1928 
1929 	ret = ib_sa_classport_info_rec_query(port, 3000,
1930 					     ib_classportinfo_cb, cb_context,
1931 					     &cb_context->sa_query);
1932 	if (ret < 0)
1933 		goto free_cb_err;
1934 	wait_for_completion(&cb_context->done);
1935 free_cb_err:
1936 	kfree(cb_context);
1937 	spin_lock_irqsave(&port->classport_lock, flags);
1938 
1939 	/* If the classport info is still not valid, the query should have
1940 	 * failed for some reason. Retry issuing the query
1941 	 */
1942 	if (!port->classport_info.valid) {
1943 		port->classport_info.retry_cnt++;
1944 		if (port->classport_info.retry_cnt <=
1945 		    IB_SA_CPI_MAX_RETRY_CNT) {
1946 			unsigned long delay =
1947 				msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1948 
1949 			queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1950 		}
1951 	}
1952 	spin_unlock_irqrestore(&port->classport_lock, flags);
1953 
1954 err_nomem:
1955 	return;
1956 }
1957 
1958 static void send_handler(struct ib_mad_agent *agent,
1959 			 struct ib_mad_send_wc *mad_send_wc)
1960 {
1961 	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1962 	unsigned long flags;
1963 
1964 	if (query->callback)
1965 		switch (mad_send_wc->status) {
1966 		case IB_WC_SUCCESS:
1967 			/* No callback -- already got recv */
1968 			break;
1969 		case IB_WC_RESP_TIMEOUT_ERR:
1970 			query->callback(query, -ETIMEDOUT, NULL);
1971 			break;
1972 		case IB_WC_WR_FLUSH_ERR:
1973 			query->callback(query, -EINTR, NULL);
1974 			break;
1975 		default:
1976 			query->callback(query, -EIO, NULL);
1977 			break;
1978 		}
1979 
1980 	xa_lock_irqsave(&queries, flags);
1981 	__xa_erase(&queries, query->id);
1982 	xa_unlock_irqrestore(&queries, flags);
1983 
1984 	free_mad(query);
1985 	if (query->client)
1986 		ib_sa_client_put(query->client);
1987 	query->release(query);
1988 }
1989 
1990 static void recv_handler(struct ib_mad_agent *mad_agent,
1991 			 struct ib_mad_send_buf *send_buf,
1992 			 struct ib_mad_recv_wc *mad_recv_wc)
1993 {
1994 	struct ib_sa_query *query;
1995 
1996 	if (!send_buf)
1997 		return;
1998 
1999 	query = send_buf->context[0];
2000 	if (query->callback) {
2001 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2002 			query->callback(query,
2003 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2004 					-EINVAL : 0,
2005 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2006 		else
2007 			query->callback(query, -EIO, NULL);
2008 	}
2009 
2010 	ib_free_recv_mad(mad_recv_wc);
2011 }
2012 
2013 static void update_sm_ah(struct work_struct *work)
2014 {
2015 	struct ib_sa_port *port =
2016 		container_of(work, struct ib_sa_port, update_task);
2017 	struct ib_sa_sm_ah *new_ah;
2018 	struct ib_port_attr port_attr;
2019 	struct rdma_ah_attr   ah_attr;
2020 	bool grh_required;
2021 
2022 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2023 		pr_warn("Couldn't query port\n");
2024 		return;
2025 	}
2026 
2027 	new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2028 	if (!new_ah)
2029 		return;
2030 
2031 	kref_init(&new_ah->ref);
2032 	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2033 
2034 	new_ah->pkey_index = 0;
2035 	if (ib_find_pkey(port->agent->device, port->port_num,
2036 			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2037 		pr_err("Couldn't find index for default PKey\n");
2038 
2039 	memset(&ah_attr, 0, sizeof(ah_attr));
2040 	ah_attr.type = rdma_ah_find_type(port->agent->device,
2041 					 port->port_num);
2042 	rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2043 	rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2044 	rdma_ah_set_port_num(&ah_attr, port->port_num);
2045 
2046 	grh_required = rdma_is_grh_required(port->agent->device,
2047 					    port->port_num);
2048 
2049 	/*
2050 	 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2051 	 * differentiated from a permissive LID of 0xFFFF.  We set the
2052 	 * grh_required flag here so the SA can program the DGID in the
2053 	 * address handle appropriately
2054 	 */
2055 	if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2056 	    (grh_required ||
2057 	     port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2058 		rdma_ah_set_make_grd(&ah_attr, true);
2059 
2060 	if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2061 		rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2062 		rdma_ah_set_subnet_prefix(&ah_attr,
2063 					  cpu_to_be64(port_attr.subnet_prefix));
2064 		rdma_ah_set_interface_id(&ah_attr,
2065 					 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2066 	}
2067 
2068 	new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2069 				    RDMA_CREATE_AH_SLEEPABLE);
2070 	if (IS_ERR(new_ah->ah)) {
2071 		pr_warn("Couldn't create new SM AH\n");
2072 		kfree(new_ah);
2073 		return;
2074 	}
2075 
2076 	spin_lock_irq(&port->ah_lock);
2077 	if (port->sm_ah)
2078 		kref_put(&port->sm_ah->ref, free_sm_ah);
2079 	port->sm_ah = new_ah;
2080 	spin_unlock_irq(&port->ah_lock);
2081 }
2082 
2083 static void ib_sa_event(struct ib_event_handler *handler,
2084 			struct ib_event *event)
2085 {
2086 	if (event->event == IB_EVENT_PORT_ERR    ||
2087 	    event->event == IB_EVENT_PORT_ACTIVE ||
2088 	    event->event == IB_EVENT_LID_CHANGE  ||
2089 	    event->event == IB_EVENT_PKEY_CHANGE ||
2090 	    event->event == IB_EVENT_SM_CHANGE   ||
2091 	    event->event == IB_EVENT_CLIENT_REREGISTER) {
2092 		unsigned long flags;
2093 		struct ib_sa_device *sa_dev =
2094 			container_of(handler, typeof(*sa_dev), event_handler);
2095 		u32 port_num = event->element.port_num - sa_dev->start_port;
2096 		struct ib_sa_port *port = &sa_dev->port[port_num];
2097 
2098 		if (!rdma_cap_ib_sa(handler->device, port->port_num))
2099 			return;
2100 
2101 		spin_lock_irqsave(&port->ah_lock, flags);
2102 		if (port->sm_ah)
2103 			kref_put(&port->sm_ah->ref, free_sm_ah);
2104 		port->sm_ah = NULL;
2105 		spin_unlock_irqrestore(&port->ah_lock, flags);
2106 
2107 		if (event->event == IB_EVENT_SM_CHANGE ||
2108 		    event->event == IB_EVENT_CLIENT_REREGISTER ||
2109 		    event->event == IB_EVENT_LID_CHANGE ||
2110 		    event->event == IB_EVENT_PORT_ACTIVE) {
2111 			unsigned long delay =
2112 				msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2113 
2114 			spin_lock_irqsave(&port->classport_lock, flags);
2115 			port->classport_info.valid = false;
2116 			port->classport_info.retry_cnt = 0;
2117 			spin_unlock_irqrestore(&port->classport_lock, flags);
2118 			queue_delayed_work(ib_wq,
2119 					   &port->ib_cpi_work, delay);
2120 		}
2121 		queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2122 	}
2123 }
2124 
2125 static int ib_sa_add_one(struct ib_device *device)
2126 {
2127 	struct ib_sa_device *sa_dev;
2128 	int s, e, i;
2129 	int count = 0;
2130 	int ret;
2131 
2132 	s = rdma_start_port(device);
2133 	e = rdma_end_port(device);
2134 
2135 	sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2136 	if (!sa_dev)
2137 		return -ENOMEM;
2138 
2139 	sa_dev->start_port = s;
2140 	sa_dev->end_port   = e;
2141 
2142 	for (i = 0; i <= e - s; ++i) {
2143 		spin_lock_init(&sa_dev->port[i].ah_lock);
2144 		if (!rdma_cap_ib_sa(device, i + 1))
2145 			continue;
2146 
2147 		sa_dev->port[i].sm_ah    = NULL;
2148 		sa_dev->port[i].port_num = i + s;
2149 
2150 		spin_lock_init(&sa_dev->port[i].classport_lock);
2151 		sa_dev->port[i].classport_info.valid = false;
2152 
2153 		sa_dev->port[i].agent =
2154 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2155 					      NULL, 0, send_handler,
2156 					      recv_handler, sa_dev, 0);
2157 		if (IS_ERR(sa_dev->port[i].agent)) {
2158 			ret = PTR_ERR(sa_dev->port[i].agent);
2159 			goto err;
2160 		}
2161 
2162 		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2163 		INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2164 				  update_ib_cpi);
2165 
2166 		count++;
2167 	}
2168 
2169 	if (!count) {
2170 		ret = -EOPNOTSUPP;
2171 		goto free;
2172 	}
2173 
2174 	ib_set_client_data(device, &sa_client, sa_dev);
2175 
2176 	/*
2177 	 * We register our event handler after everything is set up,
2178 	 * and then update our cached info after the event handler is
2179 	 * registered to avoid any problems if a port changes state
2180 	 * during our initialization.
2181 	 */
2182 
2183 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2184 	ib_register_event_handler(&sa_dev->event_handler);
2185 
2186 	for (i = 0; i <= e - s; ++i) {
2187 		if (rdma_cap_ib_sa(device, i + 1))
2188 			update_sm_ah(&sa_dev->port[i].update_task);
2189 	}
2190 
2191 	return 0;
2192 
2193 err:
2194 	while (--i >= 0) {
2195 		if (rdma_cap_ib_sa(device, i + 1))
2196 			ib_unregister_mad_agent(sa_dev->port[i].agent);
2197 	}
2198 free:
2199 	kfree(sa_dev);
2200 	return ret;
2201 }
2202 
2203 static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2204 {
2205 	struct ib_sa_device *sa_dev = client_data;
2206 	int i;
2207 
2208 	ib_unregister_event_handler(&sa_dev->event_handler);
2209 	flush_workqueue(ib_wq);
2210 
2211 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2212 		if (rdma_cap_ib_sa(device, i + 1)) {
2213 			cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2214 			ib_unregister_mad_agent(sa_dev->port[i].agent);
2215 			if (sa_dev->port[i].sm_ah)
2216 				kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2217 		}
2218 
2219 	}
2220 
2221 	kfree(sa_dev);
2222 }
2223 
2224 int ib_sa_init(void)
2225 {
2226 	int ret;
2227 
2228 	get_random_bytes(&tid, sizeof tid);
2229 
2230 	atomic_set(&ib_nl_sa_request_seq, 0);
2231 
2232 	ret = ib_register_client(&sa_client);
2233 	if (ret) {
2234 		pr_err("Couldn't register ib_sa client\n");
2235 		goto err1;
2236 	}
2237 
2238 	ret = mcast_init();
2239 	if (ret) {
2240 		pr_err("Couldn't initialize multicast handling\n");
2241 		goto err2;
2242 	}
2243 
2244 	ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2245 	if (!ib_nl_wq) {
2246 		ret = -ENOMEM;
2247 		goto err3;
2248 	}
2249 
2250 	INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2251 
2252 	return 0;
2253 
2254 err3:
2255 	mcast_cleanup();
2256 err2:
2257 	ib_unregister_client(&sa_client);
2258 err1:
2259 	return ret;
2260 }
2261 
2262 void ib_sa_cleanup(void)
2263 {
2264 	cancel_delayed_work(&ib_nl_timed_work);
2265 	destroy_workqueue(ib_nl_wq);
2266 	mcast_cleanup();
2267 	ib_unregister_client(&sa_client);
2268 	WARN_ON(!xa_empty(&queries));
2269 }
2270