1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <linux/init.h>
36 #include <linux/err.h>
37 #include <linux/random.h>
38 #include <linux/spinlock.h>
39 #include <linux/slab.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/kref.h>
42 #include <linux/xarray.h>
43 #include <linux/workqueue.h>
44 #include <uapi/linux/if_ether.h>
45 #include <rdma/ib_pack.h>
46 #include <rdma/ib_cache.h>
47 #include <rdma/rdma_netlink.h>
48 #include <net/netlink.h>
49 #include <uapi/rdma/ib_user_sa.h>
50 #include <rdma/ib_marshall.h>
51 #include <rdma/ib_addr.h>
52 #include <rdma/opa_addr.h>
53 #include "sa.h"
54 #include "core_priv.h"
55 
56 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN		100
57 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT		2000
58 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX		200000
59 #define IB_SA_CPI_MAX_RETRY_CNT			3
60 #define IB_SA_CPI_RETRY_WAIT			1000 /*msecs */
61 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
62 
63 struct ib_sa_sm_ah {
64 	struct ib_ah        *ah;
65 	struct kref          ref;
66 	u16		     pkey_index;
67 	u8		     src_path_mask;
68 };
69 
70 enum rdma_class_port_info_type {
71 	RDMA_CLASS_PORT_INFO_IB,
72 	RDMA_CLASS_PORT_INFO_OPA
73 };
74 
75 struct rdma_class_port_info {
76 	enum rdma_class_port_info_type type;
77 	union {
78 		struct ib_class_port_info ib;
79 		struct opa_class_port_info opa;
80 	};
81 };
82 
83 struct ib_sa_classport_cache {
84 	bool valid;
85 	int retry_cnt;
86 	struct rdma_class_port_info data;
87 };
88 
89 struct ib_sa_port {
90 	struct ib_mad_agent *agent;
91 	struct ib_sa_sm_ah  *sm_ah;
92 	struct work_struct   update_task;
93 	struct ib_sa_classport_cache classport_info;
94 	struct delayed_work ib_cpi_work;
95 	spinlock_t                   classport_lock; /* protects class port info set */
96 	spinlock_t           ah_lock;
97 	u32		     port_num;
98 };
99 
100 struct ib_sa_device {
101 	int                     start_port, end_port;
102 	struct ib_event_handler event_handler;
103 	struct ib_sa_port port[];
104 };
105 
106 struct ib_sa_query {
107 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
108 	void (*release)(struct ib_sa_query *);
109 	struct ib_sa_client    *client;
110 	struct ib_sa_port      *port;
111 	struct ib_mad_send_buf *mad_buf;
112 	struct ib_sa_sm_ah     *sm_ah;
113 	int			id;
114 	u32			flags;
115 	struct list_head	list; /* Local svc request list */
116 	u32			seq; /* Local svc request sequence number */
117 	unsigned long		timeout; /* Local svc timeout */
118 	u8			path_use; /* How will the pathrecord be used */
119 };
120 
121 #define IB_SA_ENABLE_LOCAL_SERVICE	0x00000001
122 #define IB_SA_CANCEL			0x00000002
123 #define IB_SA_QUERY_OPA			0x00000004
124 
125 struct ib_sa_path_query {
126 	void (*callback)(int, struct sa_path_rec *, void *);
127 	void *context;
128 	struct ib_sa_query sa_query;
129 	struct sa_path_rec *conv_pr;
130 };
131 
132 struct ib_sa_guidinfo_query {
133 	void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
134 	void *context;
135 	struct ib_sa_query sa_query;
136 };
137 
138 struct ib_sa_classport_info_query {
139 	void (*callback)(void *);
140 	void *context;
141 	struct ib_sa_query sa_query;
142 };
143 
144 struct ib_sa_mcmember_query {
145 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
146 	void *context;
147 	struct ib_sa_query sa_query;
148 };
149 
150 static LIST_HEAD(ib_nl_request_list);
151 static DEFINE_SPINLOCK(ib_nl_request_lock);
152 static atomic_t ib_nl_sa_request_seq;
153 static struct workqueue_struct *ib_nl_wq;
154 static struct delayed_work ib_nl_timed_work;
155 static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
156 	[LS_NLA_TYPE_PATH_RECORD]	= {.type = NLA_BINARY,
157 		.len = sizeof(struct ib_path_rec_data)},
158 	[LS_NLA_TYPE_TIMEOUT]		= {.type = NLA_U32},
159 	[LS_NLA_TYPE_SERVICE_ID]	= {.type = NLA_U64},
160 	[LS_NLA_TYPE_DGID]		= {.type = NLA_BINARY,
161 		.len = sizeof(struct rdma_nla_ls_gid)},
162 	[LS_NLA_TYPE_SGID]		= {.type = NLA_BINARY,
163 		.len = sizeof(struct rdma_nla_ls_gid)},
164 	[LS_NLA_TYPE_TCLASS]		= {.type = NLA_U8},
165 	[LS_NLA_TYPE_PKEY]		= {.type = NLA_U16},
166 	[LS_NLA_TYPE_QOS_CLASS]		= {.type = NLA_U16},
167 };
168 
169 
170 static int ib_sa_add_one(struct ib_device *device);
171 static void ib_sa_remove_one(struct ib_device *device, void *client_data);
172 
173 static struct ib_client sa_client = {
174 	.name   = "sa",
175 	.add    = ib_sa_add_one,
176 	.remove = ib_sa_remove_one
177 };
178 
179 static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
180 
181 static DEFINE_SPINLOCK(tid_lock);
182 static u32 tid;
183 
184 #define PATH_REC_FIELD(field) \
185 	.struct_offset_bytes = offsetof(struct sa_path_rec, field),	\
186 	.struct_size_bytes   = sizeof_field(struct sa_path_rec, field),	\
187 	.field_name          = "sa_path_rec:" #field
188 
189 static const struct ib_field path_rec_table[] = {
190 	{ PATH_REC_FIELD(service_id),
191 	  .offset_words = 0,
192 	  .offset_bits  = 0,
193 	  .size_bits    = 64 },
194 	{ PATH_REC_FIELD(dgid),
195 	  .offset_words = 2,
196 	  .offset_bits  = 0,
197 	  .size_bits    = 128 },
198 	{ PATH_REC_FIELD(sgid),
199 	  .offset_words = 6,
200 	  .offset_bits  = 0,
201 	  .size_bits    = 128 },
202 	{ PATH_REC_FIELD(ib.dlid),
203 	  .offset_words = 10,
204 	  .offset_bits  = 0,
205 	  .size_bits    = 16 },
206 	{ PATH_REC_FIELD(ib.slid),
207 	  .offset_words = 10,
208 	  .offset_bits  = 16,
209 	  .size_bits    = 16 },
210 	{ PATH_REC_FIELD(ib.raw_traffic),
211 	  .offset_words = 11,
212 	  .offset_bits  = 0,
213 	  .size_bits    = 1 },
214 	{ RESERVED,
215 	  .offset_words = 11,
216 	  .offset_bits  = 1,
217 	  .size_bits    = 3 },
218 	{ PATH_REC_FIELD(flow_label),
219 	  .offset_words = 11,
220 	  .offset_bits  = 4,
221 	  .size_bits    = 20 },
222 	{ PATH_REC_FIELD(hop_limit),
223 	  .offset_words = 11,
224 	  .offset_bits  = 24,
225 	  .size_bits    = 8 },
226 	{ PATH_REC_FIELD(traffic_class),
227 	  .offset_words = 12,
228 	  .offset_bits  = 0,
229 	  .size_bits    = 8 },
230 	{ PATH_REC_FIELD(reversible),
231 	  .offset_words = 12,
232 	  .offset_bits  = 8,
233 	  .size_bits    = 1 },
234 	{ PATH_REC_FIELD(numb_path),
235 	  .offset_words = 12,
236 	  .offset_bits  = 9,
237 	  .size_bits    = 7 },
238 	{ PATH_REC_FIELD(pkey),
239 	  .offset_words = 12,
240 	  .offset_bits  = 16,
241 	  .size_bits    = 16 },
242 	{ PATH_REC_FIELD(qos_class),
243 	  .offset_words = 13,
244 	  .offset_bits  = 0,
245 	  .size_bits    = 12 },
246 	{ PATH_REC_FIELD(sl),
247 	  .offset_words = 13,
248 	  .offset_bits  = 12,
249 	  .size_bits    = 4 },
250 	{ PATH_REC_FIELD(mtu_selector),
251 	  .offset_words = 13,
252 	  .offset_bits  = 16,
253 	  .size_bits    = 2 },
254 	{ PATH_REC_FIELD(mtu),
255 	  .offset_words = 13,
256 	  .offset_bits  = 18,
257 	  .size_bits    = 6 },
258 	{ PATH_REC_FIELD(rate_selector),
259 	  .offset_words = 13,
260 	  .offset_bits  = 24,
261 	  .size_bits    = 2 },
262 	{ PATH_REC_FIELD(rate),
263 	  .offset_words = 13,
264 	  .offset_bits  = 26,
265 	  .size_bits    = 6 },
266 	{ PATH_REC_FIELD(packet_life_time_selector),
267 	  .offset_words = 14,
268 	  .offset_bits  = 0,
269 	  .size_bits    = 2 },
270 	{ PATH_REC_FIELD(packet_life_time),
271 	  .offset_words = 14,
272 	  .offset_bits  = 2,
273 	  .size_bits    = 6 },
274 	{ PATH_REC_FIELD(preference),
275 	  .offset_words = 14,
276 	  .offset_bits  = 8,
277 	  .size_bits    = 8 },
278 	{ RESERVED,
279 	  .offset_words = 14,
280 	  .offset_bits  = 16,
281 	  .size_bits    = 48 },
282 };
283 
284 #define OPA_PATH_REC_FIELD(field) \
285 	.struct_offset_bytes = \
286 		offsetof(struct sa_path_rec, field), \
287 	.struct_size_bytes   = \
288 		sizeof_field(struct sa_path_rec, field),	\
289 	.field_name          = "sa_path_rec:" #field
290 
291 static const struct ib_field opa_path_rec_table[] = {
292 	{ OPA_PATH_REC_FIELD(service_id),
293 	  .offset_words = 0,
294 	  .offset_bits  = 0,
295 	  .size_bits    = 64 },
296 	{ OPA_PATH_REC_FIELD(dgid),
297 	  .offset_words = 2,
298 	  .offset_bits  = 0,
299 	  .size_bits    = 128 },
300 	{ OPA_PATH_REC_FIELD(sgid),
301 	  .offset_words = 6,
302 	  .offset_bits  = 0,
303 	  .size_bits    = 128 },
304 	{ OPA_PATH_REC_FIELD(opa.dlid),
305 	  .offset_words = 10,
306 	  .offset_bits  = 0,
307 	  .size_bits    = 32 },
308 	{ OPA_PATH_REC_FIELD(opa.slid),
309 	  .offset_words = 11,
310 	  .offset_bits  = 0,
311 	  .size_bits    = 32 },
312 	{ OPA_PATH_REC_FIELD(opa.raw_traffic),
313 	  .offset_words = 12,
314 	  .offset_bits  = 0,
315 	  .size_bits    = 1 },
316 	{ RESERVED,
317 	  .offset_words = 12,
318 	  .offset_bits  = 1,
319 	  .size_bits    = 3 },
320 	{ OPA_PATH_REC_FIELD(flow_label),
321 	  .offset_words = 12,
322 	  .offset_bits  = 4,
323 	  .size_bits    = 20 },
324 	{ OPA_PATH_REC_FIELD(hop_limit),
325 	  .offset_words = 12,
326 	  .offset_bits  = 24,
327 	  .size_bits    = 8 },
328 	{ OPA_PATH_REC_FIELD(traffic_class),
329 	  .offset_words = 13,
330 	  .offset_bits  = 0,
331 	  .size_bits    = 8 },
332 	{ OPA_PATH_REC_FIELD(reversible),
333 	  .offset_words = 13,
334 	  .offset_bits  = 8,
335 	  .size_bits    = 1 },
336 	{ OPA_PATH_REC_FIELD(numb_path),
337 	  .offset_words = 13,
338 	  .offset_bits  = 9,
339 	  .size_bits    = 7 },
340 	{ OPA_PATH_REC_FIELD(pkey),
341 	  .offset_words = 13,
342 	  .offset_bits  = 16,
343 	  .size_bits    = 16 },
344 	{ OPA_PATH_REC_FIELD(opa.l2_8B),
345 	  .offset_words = 14,
346 	  .offset_bits  = 0,
347 	  .size_bits    = 1 },
348 	{ OPA_PATH_REC_FIELD(opa.l2_10B),
349 	  .offset_words = 14,
350 	  .offset_bits  = 1,
351 	  .size_bits    = 1 },
352 	{ OPA_PATH_REC_FIELD(opa.l2_9B),
353 	  .offset_words = 14,
354 	  .offset_bits  = 2,
355 	  .size_bits    = 1 },
356 	{ OPA_PATH_REC_FIELD(opa.l2_16B),
357 	  .offset_words = 14,
358 	  .offset_bits  = 3,
359 	  .size_bits    = 1 },
360 	{ RESERVED,
361 	  .offset_words = 14,
362 	  .offset_bits  = 4,
363 	  .size_bits    = 2 },
364 	{ OPA_PATH_REC_FIELD(opa.qos_type),
365 	  .offset_words = 14,
366 	  .offset_bits  = 6,
367 	  .size_bits    = 2 },
368 	{ OPA_PATH_REC_FIELD(opa.qos_priority),
369 	  .offset_words = 14,
370 	  .offset_bits  = 8,
371 	  .size_bits    = 8 },
372 	{ RESERVED,
373 	  .offset_words = 14,
374 	  .offset_bits  = 16,
375 	  .size_bits    = 3 },
376 	{ OPA_PATH_REC_FIELD(sl),
377 	  .offset_words = 14,
378 	  .offset_bits  = 19,
379 	  .size_bits    = 5 },
380 	{ RESERVED,
381 	  .offset_words = 14,
382 	  .offset_bits  = 24,
383 	  .size_bits    = 8 },
384 	{ OPA_PATH_REC_FIELD(mtu_selector),
385 	  .offset_words = 15,
386 	  .offset_bits  = 0,
387 	  .size_bits    = 2 },
388 	{ OPA_PATH_REC_FIELD(mtu),
389 	  .offset_words = 15,
390 	  .offset_bits  = 2,
391 	  .size_bits    = 6 },
392 	{ OPA_PATH_REC_FIELD(rate_selector),
393 	  .offset_words = 15,
394 	  .offset_bits  = 8,
395 	  .size_bits    = 2 },
396 	{ OPA_PATH_REC_FIELD(rate),
397 	  .offset_words = 15,
398 	  .offset_bits  = 10,
399 	  .size_bits    = 6 },
400 	{ OPA_PATH_REC_FIELD(packet_life_time_selector),
401 	  .offset_words = 15,
402 	  .offset_bits  = 16,
403 	  .size_bits    = 2 },
404 	{ OPA_PATH_REC_FIELD(packet_life_time),
405 	  .offset_words = 15,
406 	  .offset_bits  = 18,
407 	  .size_bits    = 6 },
408 	{ OPA_PATH_REC_FIELD(preference),
409 	  .offset_words = 15,
410 	  .offset_bits  = 24,
411 	  .size_bits    = 8 },
412 };
413 
414 #define MCMEMBER_REC_FIELD(field) \
415 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
416 	.struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),	\
417 	.field_name          = "sa_mcmember_rec:" #field
418 
419 static const struct ib_field mcmember_rec_table[] = {
420 	{ MCMEMBER_REC_FIELD(mgid),
421 	  .offset_words = 0,
422 	  .offset_bits  = 0,
423 	  .size_bits    = 128 },
424 	{ MCMEMBER_REC_FIELD(port_gid),
425 	  .offset_words = 4,
426 	  .offset_bits  = 0,
427 	  .size_bits    = 128 },
428 	{ MCMEMBER_REC_FIELD(qkey),
429 	  .offset_words = 8,
430 	  .offset_bits  = 0,
431 	  .size_bits    = 32 },
432 	{ MCMEMBER_REC_FIELD(mlid),
433 	  .offset_words = 9,
434 	  .offset_bits  = 0,
435 	  .size_bits    = 16 },
436 	{ MCMEMBER_REC_FIELD(mtu_selector),
437 	  .offset_words = 9,
438 	  .offset_bits  = 16,
439 	  .size_bits    = 2 },
440 	{ MCMEMBER_REC_FIELD(mtu),
441 	  .offset_words = 9,
442 	  .offset_bits  = 18,
443 	  .size_bits    = 6 },
444 	{ MCMEMBER_REC_FIELD(traffic_class),
445 	  .offset_words = 9,
446 	  .offset_bits  = 24,
447 	  .size_bits    = 8 },
448 	{ MCMEMBER_REC_FIELD(pkey),
449 	  .offset_words = 10,
450 	  .offset_bits  = 0,
451 	  .size_bits    = 16 },
452 	{ MCMEMBER_REC_FIELD(rate_selector),
453 	  .offset_words = 10,
454 	  .offset_bits  = 16,
455 	  .size_bits    = 2 },
456 	{ MCMEMBER_REC_FIELD(rate),
457 	  .offset_words = 10,
458 	  .offset_bits  = 18,
459 	  .size_bits    = 6 },
460 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
461 	  .offset_words = 10,
462 	  .offset_bits  = 24,
463 	  .size_bits    = 2 },
464 	{ MCMEMBER_REC_FIELD(packet_life_time),
465 	  .offset_words = 10,
466 	  .offset_bits  = 26,
467 	  .size_bits    = 6 },
468 	{ MCMEMBER_REC_FIELD(sl),
469 	  .offset_words = 11,
470 	  .offset_bits  = 0,
471 	  .size_bits    = 4 },
472 	{ MCMEMBER_REC_FIELD(flow_label),
473 	  .offset_words = 11,
474 	  .offset_bits  = 4,
475 	  .size_bits    = 20 },
476 	{ MCMEMBER_REC_FIELD(hop_limit),
477 	  .offset_words = 11,
478 	  .offset_bits  = 24,
479 	  .size_bits    = 8 },
480 	{ MCMEMBER_REC_FIELD(scope),
481 	  .offset_words = 12,
482 	  .offset_bits  = 0,
483 	  .size_bits    = 4 },
484 	{ MCMEMBER_REC_FIELD(join_state),
485 	  .offset_words = 12,
486 	  .offset_bits  = 4,
487 	  .size_bits    = 4 },
488 	{ MCMEMBER_REC_FIELD(proxy_join),
489 	  .offset_words = 12,
490 	  .offset_bits  = 8,
491 	  .size_bits    = 1 },
492 	{ RESERVED,
493 	  .offset_words = 12,
494 	  .offset_bits  = 9,
495 	  .size_bits    = 23 },
496 };
497 
498 #define CLASSPORTINFO_REC_FIELD(field) \
499 	.struct_offset_bytes = offsetof(struct ib_class_port_info, field),	\
500 	.struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),	\
501 	.field_name          = "ib_class_port_info:" #field
502 
503 static const struct ib_field ib_classport_info_rec_table[] = {
504 	{ CLASSPORTINFO_REC_FIELD(base_version),
505 	  .offset_words = 0,
506 	  .offset_bits  = 0,
507 	  .size_bits    = 8 },
508 	{ CLASSPORTINFO_REC_FIELD(class_version),
509 	  .offset_words = 0,
510 	  .offset_bits  = 8,
511 	  .size_bits    = 8 },
512 	{ CLASSPORTINFO_REC_FIELD(capability_mask),
513 	  .offset_words = 0,
514 	  .offset_bits  = 16,
515 	  .size_bits    = 16 },
516 	{ CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
517 	  .offset_words = 1,
518 	  .offset_bits  = 0,
519 	  .size_bits    = 32 },
520 	{ CLASSPORTINFO_REC_FIELD(redirect_gid),
521 	  .offset_words = 2,
522 	  .offset_bits  = 0,
523 	  .size_bits    = 128 },
524 	{ CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
525 	  .offset_words = 6,
526 	  .offset_bits  = 0,
527 	  .size_bits    = 32 },
528 	{ CLASSPORTINFO_REC_FIELD(redirect_lid),
529 	  .offset_words = 7,
530 	  .offset_bits  = 0,
531 	  .size_bits    = 16 },
532 	{ CLASSPORTINFO_REC_FIELD(redirect_pkey),
533 	  .offset_words = 7,
534 	  .offset_bits  = 16,
535 	  .size_bits    = 16 },
536 
537 	{ CLASSPORTINFO_REC_FIELD(redirect_qp),
538 	  .offset_words = 8,
539 	  .offset_bits  = 0,
540 	  .size_bits    = 32 },
541 	{ CLASSPORTINFO_REC_FIELD(redirect_qkey),
542 	  .offset_words = 9,
543 	  .offset_bits  = 0,
544 	  .size_bits    = 32 },
545 
546 	{ CLASSPORTINFO_REC_FIELD(trap_gid),
547 	  .offset_words = 10,
548 	  .offset_bits  = 0,
549 	  .size_bits    = 128 },
550 	{ CLASSPORTINFO_REC_FIELD(trap_tcslfl),
551 	  .offset_words = 14,
552 	  .offset_bits  = 0,
553 	  .size_bits    = 32 },
554 
555 	{ CLASSPORTINFO_REC_FIELD(trap_lid),
556 	  .offset_words = 15,
557 	  .offset_bits  = 0,
558 	  .size_bits    = 16 },
559 	{ CLASSPORTINFO_REC_FIELD(trap_pkey),
560 	  .offset_words = 15,
561 	  .offset_bits  = 16,
562 	  .size_bits    = 16 },
563 
564 	{ CLASSPORTINFO_REC_FIELD(trap_hlqp),
565 	  .offset_words = 16,
566 	  .offset_bits  = 0,
567 	  .size_bits    = 32 },
568 	{ CLASSPORTINFO_REC_FIELD(trap_qkey),
569 	  .offset_words = 17,
570 	  .offset_bits  = 0,
571 	  .size_bits    = 32 },
572 };
573 
574 #define OPA_CLASSPORTINFO_REC_FIELD(field) \
575 	.struct_offset_bytes =\
576 		offsetof(struct opa_class_port_info, field),	\
577 	.struct_size_bytes   = \
578 		sizeof_field(struct opa_class_port_info, field),	\
579 	.field_name          = "opa_class_port_info:" #field
580 
581 static const struct ib_field opa_classport_info_rec_table[] = {
582 	{ OPA_CLASSPORTINFO_REC_FIELD(base_version),
583 	  .offset_words = 0,
584 	  .offset_bits  = 0,
585 	  .size_bits    = 8 },
586 	{ OPA_CLASSPORTINFO_REC_FIELD(class_version),
587 	  .offset_words = 0,
588 	  .offset_bits  = 8,
589 	  .size_bits    = 8 },
590 	{ OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
591 	  .offset_words = 0,
592 	  .offset_bits  = 16,
593 	  .size_bits    = 16 },
594 	{ OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
595 	  .offset_words = 1,
596 	  .offset_bits  = 0,
597 	  .size_bits    = 32 },
598 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
599 	  .offset_words = 2,
600 	  .offset_bits  = 0,
601 	  .size_bits    = 128 },
602 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
603 	  .offset_words = 6,
604 	  .offset_bits  = 0,
605 	  .size_bits    = 32 },
606 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
607 	  .offset_words = 7,
608 	  .offset_bits  = 0,
609 	  .size_bits    = 32 },
610 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
611 	  .offset_words = 8,
612 	  .offset_bits  = 0,
613 	  .size_bits    = 32 },
614 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
615 	  .offset_words = 9,
616 	  .offset_bits  = 0,
617 	  .size_bits    = 32 },
618 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
619 	  .offset_words = 10,
620 	  .offset_bits  = 0,
621 	  .size_bits    = 128 },
622 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
623 	  .offset_words = 14,
624 	  .offset_bits  = 0,
625 	  .size_bits    = 32 },
626 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
627 	  .offset_words = 15,
628 	  .offset_bits  = 0,
629 	  .size_bits    = 32 },
630 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
631 	  .offset_words = 16,
632 	  .offset_bits  = 0,
633 	  .size_bits    = 32 },
634 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
635 	  .offset_words = 17,
636 	  .offset_bits  = 0,
637 	  .size_bits    = 32 },
638 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
639 	  .offset_words = 18,
640 	  .offset_bits  = 0,
641 	  .size_bits    = 16 },
642 	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
643 	  .offset_words = 18,
644 	  .offset_bits  = 16,
645 	  .size_bits    = 16 },
646 	{ OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
647 	  .offset_words = 19,
648 	  .offset_bits  = 0,
649 	  .size_bits    = 8 },
650 	{ RESERVED,
651 	  .offset_words = 19,
652 	  .offset_bits  = 8,
653 	  .size_bits    = 24 },
654 };
655 
656 #define GUIDINFO_REC_FIELD(field) \
657 	.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),	\
658 	.struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),	\
659 	.field_name          = "sa_guidinfo_rec:" #field
660 
661 static const struct ib_field guidinfo_rec_table[] = {
662 	{ GUIDINFO_REC_FIELD(lid),
663 	  .offset_words = 0,
664 	  .offset_bits  = 0,
665 	  .size_bits    = 16 },
666 	{ GUIDINFO_REC_FIELD(block_num),
667 	  .offset_words = 0,
668 	  .offset_bits  = 16,
669 	  .size_bits    = 8 },
670 	{ GUIDINFO_REC_FIELD(res1),
671 	  .offset_words = 0,
672 	  .offset_bits  = 24,
673 	  .size_bits    = 8 },
674 	{ GUIDINFO_REC_FIELD(res2),
675 	  .offset_words = 1,
676 	  .offset_bits  = 0,
677 	  .size_bits    = 32 },
678 	{ GUIDINFO_REC_FIELD(guid_info_list),
679 	  .offset_words = 2,
680 	  .offset_bits  = 0,
681 	  .size_bits    = 512 },
682 };
683 
684 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
685 {
686 	query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
687 }
688 
689 static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
690 {
691 	return (query->flags & IB_SA_CANCEL);
692 }
693 
694 static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
695 				     struct ib_sa_query *query)
696 {
697 	struct sa_path_rec *sa_rec = query->mad_buf->context[1];
698 	struct ib_sa_mad *mad = query->mad_buf->mad;
699 	ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
700 	u16 val16;
701 	u64 val64;
702 	struct rdma_ls_resolve_header *header;
703 
704 	query->mad_buf->context[1] = NULL;
705 
706 	/* Construct the family header first */
707 	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
708 	strscpy_pad(header->device_name,
709 		    dev_name(&query->port->agent->device->dev),
710 		    LS_DEVICE_NAME_MAX);
711 	header->port_num = query->port->port_num;
712 
713 	if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
714 	    sa_rec->reversible != 0)
715 		query->path_use = LS_RESOLVE_PATH_USE_GMP;
716 	else
717 		query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
718 	header->path_use = query->path_use;
719 
720 	/* Now build the attributes */
721 	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
722 		val64 = be64_to_cpu(sa_rec->service_id);
723 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
724 			sizeof(val64), &val64);
725 	}
726 	if (comp_mask & IB_SA_PATH_REC_DGID)
727 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
728 			sizeof(sa_rec->dgid), &sa_rec->dgid);
729 	if (comp_mask & IB_SA_PATH_REC_SGID)
730 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
731 			sizeof(sa_rec->sgid), &sa_rec->sgid);
732 	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
733 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
734 			sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
735 
736 	if (comp_mask & IB_SA_PATH_REC_PKEY) {
737 		val16 = be16_to_cpu(sa_rec->pkey);
738 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
739 			sizeof(val16), &val16);
740 	}
741 	if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
742 		val16 = be16_to_cpu(sa_rec->qos_class);
743 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
744 			sizeof(val16), &val16);
745 	}
746 }
747 
748 static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
749 {
750 	int len = 0;
751 
752 	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
753 		len += nla_total_size(sizeof(u64));
754 	if (comp_mask & IB_SA_PATH_REC_DGID)
755 		len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
756 	if (comp_mask & IB_SA_PATH_REC_SGID)
757 		len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
758 	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
759 		len += nla_total_size(sizeof(u8));
760 	if (comp_mask & IB_SA_PATH_REC_PKEY)
761 		len += nla_total_size(sizeof(u16));
762 	if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
763 		len += nla_total_size(sizeof(u16));
764 
765 	/*
766 	 * Make sure that at least some of the required comp_mask bits are
767 	 * set.
768 	 */
769 	if (WARN_ON(len == 0))
770 		return len;
771 
772 	/* Add the family header */
773 	len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
774 
775 	return len;
776 }
777 
778 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
779 {
780 	struct sk_buff *skb = NULL;
781 	struct nlmsghdr *nlh;
782 	void *data;
783 	struct ib_sa_mad *mad;
784 	int len;
785 	unsigned long flags;
786 	unsigned long delay;
787 	gfp_t gfp_flag;
788 	int ret;
789 
790 	INIT_LIST_HEAD(&query->list);
791 	query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
792 
793 	mad = query->mad_buf->mad;
794 	len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
795 	if (len <= 0)
796 		return -EMSGSIZE;
797 
798 	skb = nlmsg_new(len, gfp_mask);
799 	if (!skb)
800 		return -ENOMEM;
801 
802 	/* Put nlmsg header only for now */
803 	data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
804 			    RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
805 	if (!data) {
806 		nlmsg_free(skb);
807 		return -EMSGSIZE;
808 	}
809 
810 	/* Add attributes */
811 	ib_nl_set_path_rec_attrs(skb, query);
812 
813 	/* Repair the nlmsg header length */
814 	nlmsg_end(skb, nlh);
815 
816 	gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
817 		GFP_NOWAIT;
818 
819 	spin_lock_irqsave(&ib_nl_request_lock, flags);
820 	ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
821 
822 	if (ret)
823 		goto out;
824 
825 	/* Put the request on the list.*/
826 	delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
827 	query->timeout = delay + jiffies;
828 	list_add_tail(&query->list, &ib_nl_request_list);
829 	/* Start the timeout if this is the only request */
830 	if (ib_nl_request_list.next == &query->list)
831 		queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
832 
833 out:
834 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
835 
836 	return ret;
837 }
838 
839 static int ib_nl_cancel_request(struct ib_sa_query *query)
840 {
841 	unsigned long flags;
842 	struct ib_sa_query *wait_query;
843 	int found = 0;
844 
845 	spin_lock_irqsave(&ib_nl_request_lock, flags);
846 	list_for_each_entry(wait_query, &ib_nl_request_list, list) {
847 		/* Let the timeout to take care of the callback */
848 		if (query == wait_query) {
849 			query->flags |= IB_SA_CANCEL;
850 			query->timeout = jiffies;
851 			list_move(&query->list, &ib_nl_request_list);
852 			found = 1;
853 			mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
854 			break;
855 		}
856 	}
857 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
858 
859 	return found;
860 }
861 
862 static void send_handler(struct ib_mad_agent *agent,
863 			 struct ib_mad_send_wc *mad_send_wc);
864 
865 static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
866 					   const struct nlmsghdr *nlh)
867 {
868 	struct ib_mad_send_wc mad_send_wc;
869 	struct ib_sa_mad *mad = NULL;
870 	const struct nlattr *head, *curr;
871 	struct ib_path_rec_data  *rec;
872 	int len, rem;
873 	u32 mask = 0;
874 	int status = -EIO;
875 
876 	if (query->callback) {
877 		head = (const struct nlattr *) nlmsg_data(nlh);
878 		len = nlmsg_len(nlh);
879 		switch (query->path_use) {
880 		case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
881 			mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
882 			break;
883 
884 		case LS_RESOLVE_PATH_USE_ALL:
885 		case LS_RESOLVE_PATH_USE_GMP:
886 		default:
887 			mask = IB_PATH_PRIMARY | IB_PATH_GMP |
888 				IB_PATH_BIDIRECTIONAL;
889 			break;
890 		}
891 		nla_for_each_attr(curr, head, len, rem) {
892 			if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
893 				rec = nla_data(curr);
894 				/*
895 				 * Get the first one. In the future, we may
896 				 * need to get up to 6 pathrecords.
897 				 */
898 				if ((rec->flags & mask) == mask) {
899 					mad = query->mad_buf->mad;
900 					mad->mad_hdr.method |=
901 						IB_MGMT_METHOD_RESP;
902 					memcpy(mad->data, rec->path_rec,
903 					       sizeof(rec->path_rec));
904 					status = 0;
905 					break;
906 				}
907 			}
908 		}
909 		query->callback(query, status, mad);
910 	}
911 
912 	mad_send_wc.send_buf = query->mad_buf;
913 	mad_send_wc.status = IB_WC_SUCCESS;
914 	send_handler(query->mad_buf->mad_agent, &mad_send_wc);
915 }
916 
917 static void ib_nl_request_timeout(struct work_struct *work)
918 {
919 	unsigned long flags;
920 	struct ib_sa_query *query;
921 	unsigned long delay;
922 	struct ib_mad_send_wc mad_send_wc;
923 	int ret;
924 
925 	spin_lock_irqsave(&ib_nl_request_lock, flags);
926 	while (!list_empty(&ib_nl_request_list)) {
927 		query = list_entry(ib_nl_request_list.next,
928 				   struct ib_sa_query, list);
929 
930 		if (time_after(query->timeout, jiffies)) {
931 			delay = query->timeout - jiffies;
932 			if ((long)delay <= 0)
933 				delay = 1;
934 			queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
935 			break;
936 		}
937 
938 		list_del(&query->list);
939 		ib_sa_disable_local_svc(query);
940 		/* Hold the lock to protect against query cancellation */
941 		if (ib_sa_query_cancelled(query))
942 			ret = -1;
943 		else
944 			ret = ib_post_send_mad(query->mad_buf, NULL);
945 		if (ret) {
946 			mad_send_wc.send_buf = query->mad_buf;
947 			mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
948 			spin_unlock_irqrestore(&ib_nl_request_lock, flags);
949 			send_handler(query->port->agent, &mad_send_wc);
950 			spin_lock_irqsave(&ib_nl_request_lock, flags);
951 		}
952 	}
953 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
954 }
955 
956 int ib_nl_handle_set_timeout(struct sk_buff *skb,
957 			     struct nlmsghdr *nlh,
958 			     struct netlink_ext_ack *extack)
959 {
960 	int timeout, delta, abs_delta;
961 	const struct nlattr *attr;
962 	unsigned long flags;
963 	struct ib_sa_query *query;
964 	long delay = 0;
965 	struct nlattr *tb[LS_NLA_TYPE_MAX];
966 	int ret;
967 
968 	if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
969 	    !(NETLINK_CB(skb).sk))
970 		return -EPERM;
971 
972 	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
973 				   nlmsg_len(nlh), ib_nl_policy, NULL);
974 	attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
975 	if (ret || !attr)
976 		goto settimeout_out;
977 
978 	timeout = *(int *) nla_data(attr);
979 	if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
980 		timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
981 	if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
982 		timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
983 
984 	delta = timeout - sa_local_svc_timeout_ms;
985 	if (delta < 0)
986 		abs_delta = -delta;
987 	else
988 		abs_delta = delta;
989 
990 	if (delta != 0) {
991 		spin_lock_irqsave(&ib_nl_request_lock, flags);
992 		sa_local_svc_timeout_ms = timeout;
993 		list_for_each_entry(query, &ib_nl_request_list, list) {
994 			if (delta < 0 && abs_delta > query->timeout)
995 				query->timeout = 0;
996 			else
997 				query->timeout += delta;
998 
999 			/* Get the new delay from the first entry */
1000 			if (!delay) {
1001 				delay = query->timeout - jiffies;
1002 				if (delay <= 0)
1003 					delay = 1;
1004 			}
1005 		}
1006 		if (delay)
1007 			mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1008 					 (unsigned long)delay);
1009 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1010 	}
1011 
1012 settimeout_out:
1013 	return 0;
1014 }
1015 
1016 static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1017 {
1018 	struct nlattr *tb[LS_NLA_TYPE_MAX];
1019 	int ret;
1020 
1021 	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1022 		return 0;
1023 
1024 	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1025 				   nlmsg_len(nlh), ib_nl_policy, NULL);
1026 	if (ret)
1027 		return 0;
1028 
1029 	return 1;
1030 }
1031 
1032 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1033 			      struct nlmsghdr *nlh,
1034 			      struct netlink_ext_ack *extack)
1035 {
1036 	unsigned long flags;
1037 	struct ib_sa_query *query = NULL, *iter;
1038 	struct ib_mad_send_buf *send_buf;
1039 	struct ib_mad_send_wc mad_send_wc;
1040 	int ret;
1041 
1042 	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1043 	    !(NETLINK_CB(skb).sk))
1044 		return -EPERM;
1045 
1046 	spin_lock_irqsave(&ib_nl_request_lock, flags);
1047 	list_for_each_entry(iter, &ib_nl_request_list, list) {
1048 		/*
1049 		 * If the query is cancelled, let the timeout routine
1050 		 * take care of it.
1051 		 */
1052 		if (nlh->nlmsg_seq == iter->seq) {
1053 			if (!ib_sa_query_cancelled(iter)) {
1054 				list_del(&iter->list);
1055 				query = iter;
1056 			}
1057 			break;
1058 		}
1059 	}
1060 
1061 	if (!query) {
1062 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1063 		goto resp_out;
1064 	}
1065 
1066 	send_buf = query->mad_buf;
1067 
1068 	if (!ib_nl_is_good_resolve_resp(nlh)) {
1069 		/* if the result is a failure, send out the packet via IB */
1070 		ib_sa_disable_local_svc(query);
1071 		ret = ib_post_send_mad(query->mad_buf, NULL);
1072 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1073 		if (ret) {
1074 			mad_send_wc.send_buf = send_buf;
1075 			mad_send_wc.status = IB_WC_GENERAL_ERR;
1076 			send_handler(query->port->agent, &mad_send_wc);
1077 		}
1078 	} else {
1079 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1080 		ib_nl_process_good_resolve_rsp(query, nlh);
1081 	}
1082 
1083 resp_out:
1084 	return 0;
1085 }
1086 
1087 static void free_sm_ah(struct kref *kref)
1088 {
1089 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1090 
1091 	rdma_destroy_ah(sm_ah->ah, 0);
1092 	kfree(sm_ah);
1093 }
1094 
1095 void ib_sa_register_client(struct ib_sa_client *client)
1096 {
1097 	atomic_set(&client->users, 1);
1098 	init_completion(&client->comp);
1099 }
1100 EXPORT_SYMBOL(ib_sa_register_client);
1101 
1102 void ib_sa_unregister_client(struct ib_sa_client *client)
1103 {
1104 	ib_sa_client_put(client);
1105 	wait_for_completion(&client->comp);
1106 }
1107 EXPORT_SYMBOL(ib_sa_unregister_client);
1108 
1109 /**
1110  * ib_sa_cancel_query - try to cancel an SA query
1111  * @id:ID of query to cancel
1112  * @query:query pointer to cancel
1113  *
1114  * Try to cancel an SA query.  If the id and query don't match up or
1115  * the query has already completed, nothing is done.  Otherwise the
1116  * query is canceled and will complete with a status of -EINTR.
1117  */
1118 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1119 {
1120 	unsigned long flags;
1121 	struct ib_mad_send_buf *mad_buf;
1122 
1123 	xa_lock_irqsave(&queries, flags);
1124 	if (xa_load(&queries, id) != query) {
1125 		xa_unlock_irqrestore(&queries, flags);
1126 		return;
1127 	}
1128 	mad_buf = query->mad_buf;
1129 	xa_unlock_irqrestore(&queries, flags);
1130 
1131 	/*
1132 	 * If the query is still on the netlink request list, schedule
1133 	 * it to be cancelled by the timeout routine. Otherwise, it has been
1134 	 * sent to the MAD layer and has to be cancelled from there.
1135 	 */
1136 	if (!ib_nl_cancel_request(query))
1137 		ib_cancel_mad(mad_buf);
1138 }
1139 EXPORT_SYMBOL(ib_sa_cancel_query);
1140 
1141 static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1142 {
1143 	struct ib_sa_device *sa_dev;
1144 	struct ib_sa_port   *port;
1145 	unsigned long flags;
1146 	u8 src_path_mask;
1147 
1148 	sa_dev = ib_get_client_data(device, &sa_client);
1149 	if (!sa_dev)
1150 		return 0x7f;
1151 
1152 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1153 	spin_lock_irqsave(&port->ah_lock, flags);
1154 	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1155 	spin_unlock_irqrestore(&port->ah_lock, flags);
1156 
1157 	return src_path_mask;
1158 }
1159 
1160 static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1161 				   struct sa_path_rec *rec,
1162 				   struct rdma_ah_attr *ah_attr,
1163 				   const struct ib_gid_attr *gid_attr)
1164 {
1165 	enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1166 
1167 	if (!gid_attr) {
1168 		gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1169 						 port_num, NULL);
1170 		if (IS_ERR(gid_attr))
1171 			return PTR_ERR(gid_attr);
1172 	} else
1173 		rdma_hold_gid_attr(gid_attr);
1174 
1175 	rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1176 				be32_to_cpu(rec->flow_label),
1177 				rec->hop_limit,	rec->traffic_class,
1178 				gid_attr);
1179 	return 0;
1180 }
1181 
1182 /**
1183  * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1184  *   an SA path record.
1185  * @device: Device associated ah attributes initialization.
1186  * @port_num: Port on the specified device.
1187  * @rec: path record entry to use for ah attributes initialization.
1188  * @ah_attr: address handle attributes to initialization from path record.
1189  * @gid_attr: SGID attribute to consider during initialization.
1190  *
1191  * When ib_init_ah_attr_from_path() returns success,
1192  * (a) for IB link layer it optionally contains a reference to SGID attribute
1193  * when GRH is present for IB link layer.
1194  * (b) for RoCE link layer it contains a reference to SGID attribute.
1195  * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1196  * attributes which are initialized using ib_init_ah_attr_from_path().
1197  */
1198 int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1199 			      struct sa_path_rec *rec,
1200 			      struct rdma_ah_attr *ah_attr,
1201 			      const struct ib_gid_attr *gid_attr)
1202 {
1203 	int ret = 0;
1204 
1205 	memset(ah_attr, 0, sizeof(*ah_attr));
1206 	ah_attr->type = rdma_ah_find_type(device, port_num);
1207 	rdma_ah_set_sl(ah_attr, rec->sl);
1208 	rdma_ah_set_port_num(ah_attr, port_num);
1209 	rdma_ah_set_static_rate(ah_attr, rec->rate);
1210 
1211 	if (sa_path_is_roce(rec)) {
1212 		ret = roce_resolve_route_from_path(rec, gid_attr);
1213 		if (ret)
1214 			return ret;
1215 
1216 		memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1217 	} else {
1218 		rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1219 		if (sa_path_is_opa(rec) &&
1220 		    rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1221 			rdma_ah_set_make_grd(ah_attr, true);
1222 
1223 		rdma_ah_set_path_bits(ah_attr,
1224 				      be32_to_cpu(sa_path_get_slid(rec)) &
1225 				      get_src_path_mask(device, port_num));
1226 	}
1227 
1228 	if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1229 		ret = init_ah_attr_grh_fields(device, port_num,
1230 					      rec, ah_attr, gid_attr);
1231 	return ret;
1232 }
1233 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1234 
1235 static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1236 {
1237 	struct rdma_ah_attr ah_attr;
1238 	unsigned long flags;
1239 
1240 	spin_lock_irqsave(&query->port->ah_lock, flags);
1241 	if (!query->port->sm_ah) {
1242 		spin_unlock_irqrestore(&query->port->ah_lock, flags);
1243 		return -EAGAIN;
1244 	}
1245 	kref_get(&query->port->sm_ah->ref);
1246 	query->sm_ah = query->port->sm_ah;
1247 	spin_unlock_irqrestore(&query->port->ah_lock, flags);
1248 
1249 	/*
1250 	 * Always check if sm_ah has valid dlid assigned,
1251 	 * before querying for class port info
1252 	 */
1253 	if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1254 	    !rdma_is_valid_unicast_lid(&ah_attr)) {
1255 		kref_put(&query->sm_ah->ref, free_sm_ah);
1256 		return -EAGAIN;
1257 	}
1258 	query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1259 					    query->sm_ah->pkey_index,
1260 					    0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1261 					    gfp_mask,
1262 					    ((query->flags & IB_SA_QUERY_OPA) ?
1263 					     OPA_MGMT_BASE_VERSION :
1264 					     IB_MGMT_BASE_VERSION));
1265 	if (IS_ERR(query->mad_buf)) {
1266 		kref_put(&query->sm_ah->ref, free_sm_ah);
1267 		return -ENOMEM;
1268 	}
1269 
1270 	query->mad_buf->ah = query->sm_ah->ah;
1271 
1272 	return 0;
1273 }
1274 
1275 static void free_mad(struct ib_sa_query *query)
1276 {
1277 	ib_free_send_mad(query->mad_buf);
1278 	kref_put(&query->sm_ah->ref, free_sm_ah);
1279 }
1280 
1281 static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1282 {
1283 	struct ib_sa_mad *mad = query->mad_buf->mad;
1284 	unsigned long flags;
1285 
1286 	memset(mad, 0, sizeof *mad);
1287 
1288 	if (query->flags & IB_SA_QUERY_OPA) {
1289 		mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
1290 		mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1291 	} else {
1292 		mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1293 		mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1294 	}
1295 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1296 	spin_lock_irqsave(&tid_lock, flags);
1297 	mad->mad_hdr.tid           =
1298 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1299 	spin_unlock_irqrestore(&tid_lock, flags);
1300 }
1301 
1302 static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1303 		    gfp_t gfp_mask)
1304 {
1305 	unsigned long flags;
1306 	int ret, id;
1307 	const int nmbr_sa_query_retries = 10;
1308 
1309 	xa_lock_irqsave(&queries, flags);
1310 	ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1311 	xa_unlock_irqrestore(&queries, flags);
1312 	if (ret < 0)
1313 		return ret;
1314 
1315 	query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
1316 	query->mad_buf->retries = nmbr_sa_query_retries;
1317 	if (!query->mad_buf->timeout_ms) {
1318 		/* Special case, very small timeout_ms */
1319 		query->mad_buf->timeout_ms = 1;
1320 		query->mad_buf->retries = timeout_ms;
1321 	}
1322 	query->mad_buf->context[0] = query;
1323 	query->id = id;
1324 
1325 	if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1326 	    (!(query->flags & IB_SA_QUERY_OPA))) {
1327 		if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1328 			if (!ib_nl_make_request(query, gfp_mask))
1329 				return id;
1330 		}
1331 		ib_sa_disable_local_svc(query);
1332 	}
1333 
1334 	ret = ib_post_send_mad(query->mad_buf, NULL);
1335 	if (ret) {
1336 		xa_lock_irqsave(&queries, flags);
1337 		__xa_erase(&queries, id);
1338 		xa_unlock_irqrestore(&queries, flags);
1339 	}
1340 
1341 	/*
1342 	 * It's not safe to dereference query any more, because the
1343 	 * send may already have completed and freed the query in
1344 	 * another context.
1345 	 */
1346 	return ret ? ret : id;
1347 }
1348 
1349 void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1350 {
1351 	ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1352 }
1353 EXPORT_SYMBOL(ib_sa_unpack_path);
1354 
1355 void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1356 {
1357 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1358 }
1359 EXPORT_SYMBOL(ib_sa_pack_path);
1360 
1361 static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1362 					 struct ib_sa_device *sa_dev,
1363 					 u32 port_num)
1364 {
1365 	struct ib_sa_port *port;
1366 	unsigned long flags;
1367 	bool ret = false;
1368 
1369 	port = &sa_dev->port[port_num - sa_dev->start_port];
1370 	spin_lock_irqsave(&port->classport_lock, flags);
1371 	if (!port->classport_info.valid)
1372 		goto ret;
1373 
1374 	if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1375 		ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1376 			OPA_CLASS_PORT_INFO_PR_SUPPORT;
1377 ret:
1378 	spin_unlock_irqrestore(&port->classport_lock, flags);
1379 	return ret;
1380 }
1381 
1382 enum opa_pr_supported {
1383 	PR_NOT_SUPPORTED,
1384 	PR_OPA_SUPPORTED,
1385 	PR_IB_SUPPORTED
1386 };
1387 
1388 /*
1389  * opa_pr_query_possible - Check if current PR query can be an OPA query.
1390  *
1391  * Retuns PR_NOT_SUPPORTED if a path record query is not
1392  * possible, PR_OPA_SUPPORTED if an OPA path record query
1393  * is possible and PR_IB_SUPPORTED if an IB path record
1394  * query is possible.
1395  */
1396 static int opa_pr_query_possible(struct ib_sa_client *client,
1397 				 struct ib_sa_device *sa_dev,
1398 				 struct ib_device *device, u32 port_num)
1399 {
1400 	struct ib_port_attr port_attr;
1401 
1402 	if (ib_query_port(device, port_num, &port_attr))
1403 		return PR_NOT_SUPPORTED;
1404 
1405 	if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1406 		return PR_OPA_SUPPORTED;
1407 
1408 	if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1409 		return PR_NOT_SUPPORTED;
1410 	else
1411 		return PR_IB_SUPPORTED;
1412 }
1413 
1414 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1415 				    int status,
1416 				    struct ib_sa_mad *mad)
1417 {
1418 	struct ib_sa_path_query *query =
1419 		container_of(sa_query, struct ib_sa_path_query, sa_query);
1420 
1421 	if (mad) {
1422 		struct sa_path_rec rec;
1423 
1424 		if (sa_query->flags & IB_SA_QUERY_OPA) {
1425 			ib_unpack(opa_path_rec_table,
1426 				  ARRAY_SIZE(opa_path_rec_table),
1427 				  mad->data, &rec);
1428 			rec.rec_type = SA_PATH_REC_TYPE_OPA;
1429 			query->callback(status, &rec, query->context);
1430 		} else {
1431 			ib_unpack(path_rec_table,
1432 				  ARRAY_SIZE(path_rec_table),
1433 				  mad->data, &rec);
1434 			rec.rec_type = SA_PATH_REC_TYPE_IB;
1435 			sa_path_set_dmac_zero(&rec);
1436 
1437 			if (query->conv_pr) {
1438 				struct sa_path_rec opa;
1439 
1440 				memset(&opa, 0, sizeof(struct sa_path_rec));
1441 				sa_convert_path_ib_to_opa(&opa, &rec);
1442 				query->callback(status, &opa, query->context);
1443 			} else {
1444 				query->callback(status, &rec, query->context);
1445 			}
1446 		}
1447 	} else
1448 		query->callback(status, NULL, query->context);
1449 }
1450 
1451 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1452 {
1453 	struct ib_sa_path_query *query =
1454 		container_of(sa_query, struct ib_sa_path_query, sa_query);
1455 
1456 	kfree(query->conv_pr);
1457 	kfree(query);
1458 }
1459 
1460 /**
1461  * ib_sa_path_rec_get - Start a Path get query
1462  * @client:SA client
1463  * @device:device to send query on
1464  * @port_num: port number to send query on
1465  * @rec:Path Record to send in query
1466  * @comp_mask:component mask to send in query
1467  * @timeout_ms:time to wait for response
1468  * @gfp_mask:GFP mask to use for internal allocations
1469  * @callback:function called when query completes, times out or is
1470  * canceled
1471  * @context:opaque user context passed to callback
1472  * @sa_query:query context, used to cancel query
1473  *
1474  * Send a Path Record Get query to the SA to look up a path.  The
1475  * callback function will be called when the query completes (or
1476  * fails); status is 0 for a successful response, -EINTR if the query
1477  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1478  * occurred sending the query.  The resp parameter of the callback is
1479  * only valid if status is 0.
1480  *
1481  * If the return value of ib_sa_path_rec_get() is negative, it is an
1482  * error code.  Otherwise it is a query ID that can be used to cancel
1483  * the query.
1484  */
1485 int ib_sa_path_rec_get(struct ib_sa_client *client,
1486 		       struct ib_device *device, u32 port_num,
1487 		       struct sa_path_rec *rec,
1488 		       ib_sa_comp_mask comp_mask,
1489 		       unsigned long timeout_ms, gfp_t gfp_mask,
1490 		       void (*callback)(int status,
1491 					struct sa_path_rec *resp,
1492 					void *context),
1493 		       void *context,
1494 		       struct ib_sa_query **sa_query)
1495 {
1496 	struct ib_sa_path_query *query;
1497 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1498 	struct ib_sa_port   *port;
1499 	struct ib_mad_agent *agent;
1500 	struct ib_sa_mad *mad;
1501 	enum opa_pr_supported status;
1502 	int ret;
1503 
1504 	if (!sa_dev)
1505 		return -ENODEV;
1506 
1507 	if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1508 	    (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1509 		return -EINVAL;
1510 
1511 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1512 	agent = port->agent;
1513 
1514 	query = kzalloc(sizeof(*query), gfp_mask);
1515 	if (!query)
1516 		return -ENOMEM;
1517 
1518 	query->sa_query.port     = port;
1519 	if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1520 		status = opa_pr_query_possible(client, sa_dev, device, port_num);
1521 		if (status == PR_NOT_SUPPORTED) {
1522 			ret = -EINVAL;
1523 			goto err1;
1524 		} else if (status == PR_OPA_SUPPORTED) {
1525 			query->sa_query.flags |= IB_SA_QUERY_OPA;
1526 		} else {
1527 			query->conv_pr =
1528 				kmalloc(sizeof(*query->conv_pr), gfp_mask);
1529 			if (!query->conv_pr) {
1530 				ret = -ENOMEM;
1531 				goto err1;
1532 			}
1533 		}
1534 	}
1535 
1536 	ret = alloc_mad(&query->sa_query, gfp_mask);
1537 	if (ret)
1538 		goto err2;
1539 
1540 	ib_sa_client_get(client);
1541 	query->sa_query.client = client;
1542 	query->callback        = callback;
1543 	query->context         = context;
1544 
1545 	mad = query->sa_query.mad_buf->mad;
1546 	init_mad(&query->sa_query, agent);
1547 
1548 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1549 	query->sa_query.release  = ib_sa_path_rec_release;
1550 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
1551 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1552 	mad->sa_hdr.comp_mask	 = comp_mask;
1553 
1554 	if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1555 		ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1556 			rec, mad->data);
1557 	} else if (query->conv_pr) {
1558 		sa_convert_path_opa_to_ib(query->conv_pr, rec);
1559 		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1560 			query->conv_pr, mad->data);
1561 	} else {
1562 		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1563 			rec, mad->data);
1564 	}
1565 
1566 	*sa_query = &query->sa_query;
1567 
1568 	query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1569 	query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1570 						query->conv_pr : rec;
1571 
1572 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1573 	if (ret < 0)
1574 		goto err3;
1575 
1576 	return ret;
1577 
1578 err3:
1579 	*sa_query = NULL;
1580 	ib_sa_client_put(query->sa_query.client);
1581 	free_mad(&query->sa_query);
1582 err2:
1583 	kfree(query->conv_pr);
1584 err1:
1585 	kfree(query);
1586 	return ret;
1587 }
1588 EXPORT_SYMBOL(ib_sa_path_rec_get);
1589 
1590 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1591 					int status,
1592 					struct ib_sa_mad *mad)
1593 {
1594 	struct ib_sa_mcmember_query *query =
1595 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1596 
1597 	if (mad) {
1598 		struct ib_sa_mcmember_rec rec;
1599 
1600 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1601 			  mad->data, &rec);
1602 		query->callback(status, &rec, query->context);
1603 	} else
1604 		query->callback(status, NULL, query->context);
1605 }
1606 
1607 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1608 {
1609 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1610 }
1611 
1612 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1613 			     struct ib_device *device, u32 port_num,
1614 			     u8 method,
1615 			     struct ib_sa_mcmember_rec *rec,
1616 			     ib_sa_comp_mask comp_mask,
1617 			     unsigned long timeout_ms, gfp_t gfp_mask,
1618 			     void (*callback)(int status,
1619 					      struct ib_sa_mcmember_rec *resp,
1620 					      void *context),
1621 			     void *context,
1622 			     struct ib_sa_query **sa_query)
1623 {
1624 	struct ib_sa_mcmember_query *query;
1625 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1626 	struct ib_sa_port   *port;
1627 	struct ib_mad_agent *agent;
1628 	struct ib_sa_mad *mad;
1629 	int ret;
1630 
1631 	if (!sa_dev)
1632 		return -ENODEV;
1633 
1634 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1635 	agent = port->agent;
1636 
1637 	query = kzalloc(sizeof(*query), gfp_mask);
1638 	if (!query)
1639 		return -ENOMEM;
1640 
1641 	query->sa_query.port     = port;
1642 	ret = alloc_mad(&query->sa_query, gfp_mask);
1643 	if (ret)
1644 		goto err1;
1645 
1646 	ib_sa_client_get(client);
1647 	query->sa_query.client = client;
1648 	query->callback        = callback;
1649 	query->context         = context;
1650 
1651 	mad = query->sa_query.mad_buf->mad;
1652 	init_mad(&query->sa_query, agent);
1653 
1654 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1655 	query->sa_query.release  = ib_sa_mcmember_rec_release;
1656 	mad->mad_hdr.method	 = method;
1657 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1658 	mad->sa_hdr.comp_mask	 = comp_mask;
1659 
1660 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1661 		rec, mad->data);
1662 
1663 	*sa_query = &query->sa_query;
1664 
1665 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1666 	if (ret < 0)
1667 		goto err2;
1668 
1669 	return ret;
1670 
1671 err2:
1672 	*sa_query = NULL;
1673 	ib_sa_client_put(query->sa_query.client);
1674 	free_mad(&query->sa_query);
1675 
1676 err1:
1677 	kfree(query);
1678 	return ret;
1679 }
1680 
1681 /* Support GuidInfoRecord */
1682 static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1683 					int status,
1684 					struct ib_sa_mad *mad)
1685 {
1686 	struct ib_sa_guidinfo_query *query =
1687 		container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1688 
1689 	if (mad) {
1690 		struct ib_sa_guidinfo_rec rec;
1691 
1692 		ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1693 			  mad->data, &rec);
1694 		query->callback(status, &rec, query->context);
1695 	} else
1696 		query->callback(status, NULL, query->context);
1697 }
1698 
1699 static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1700 {
1701 	kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1702 }
1703 
1704 int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1705 			      struct ib_device *device, u32 port_num,
1706 			      struct ib_sa_guidinfo_rec *rec,
1707 			      ib_sa_comp_mask comp_mask, u8 method,
1708 			      unsigned long timeout_ms, gfp_t gfp_mask,
1709 			      void (*callback)(int status,
1710 					       struct ib_sa_guidinfo_rec *resp,
1711 					       void *context),
1712 			      void *context,
1713 			      struct ib_sa_query **sa_query)
1714 {
1715 	struct ib_sa_guidinfo_query *query;
1716 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1717 	struct ib_sa_port *port;
1718 	struct ib_mad_agent *agent;
1719 	struct ib_sa_mad *mad;
1720 	int ret;
1721 
1722 	if (!sa_dev)
1723 		return -ENODEV;
1724 
1725 	if (method != IB_MGMT_METHOD_GET &&
1726 	    method != IB_MGMT_METHOD_SET &&
1727 	    method != IB_SA_METHOD_DELETE) {
1728 		return -EINVAL;
1729 	}
1730 
1731 	port  = &sa_dev->port[port_num - sa_dev->start_port];
1732 	agent = port->agent;
1733 
1734 	query = kzalloc(sizeof(*query), gfp_mask);
1735 	if (!query)
1736 		return -ENOMEM;
1737 
1738 	query->sa_query.port = port;
1739 	ret = alloc_mad(&query->sa_query, gfp_mask);
1740 	if (ret)
1741 		goto err1;
1742 
1743 	ib_sa_client_get(client);
1744 	query->sa_query.client = client;
1745 	query->callback        = callback;
1746 	query->context         = context;
1747 
1748 	mad = query->sa_query.mad_buf->mad;
1749 	init_mad(&query->sa_query, agent);
1750 
1751 	query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1752 	query->sa_query.release  = ib_sa_guidinfo_rec_release;
1753 
1754 	mad->mad_hdr.method	 = method;
1755 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1756 	mad->sa_hdr.comp_mask	 = comp_mask;
1757 
1758 	ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1759 		mad->data);
1760 
1761 	*sa_query = &query->sa_query;
1762 
1763 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1764 	if (ret < 0)
1765 		goto err2;
1766 
1767 	return ret;
1768 
1769 err2:
1770 	*sa_query = NULL;
1771 	ib_sa_client_put(query->sa_query.client);
1772 	free_mad(&query->sa_query);
1773 
1774 err1:
1775 	kfree(query);
1776 	return ret;
1777 }
1778 EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1779 
1780 struct ib_classport_info_context {
1781 	struct completion	done;
1782 	struct ib_sa_query	*sa_query;
1783 };
1784 
1785 static void ib_classportinfo_cb(void *context)
1786 {
1787 	struct ib_classport_info_context *cb_ctx = context;
1788 
1789 	complete(&cb_ctx->done);
1790 }
1791 
1792 static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1793 					      int status,
1794 					      struct ib_sa_mad *mad)
1795 {
1796 	unsigned long flags;
1797 	struct ib_sa_classport_info_query *query =
1798 		container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1799 	struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1800 
1801 	if (mad) {
1802 		if (sa_query->flags & IB_SA_QUERY_OPA) {
1803 			struct opa_class_port_info rec;
1804 
1805 			ib_unpack(opa_classport_info_rec_table,
1806 				  ARRAY_SIZE(opa_classport_info_rec_table),
1807 				  mad->data, &rec);
1808 
1809 			spin_lock_irqsave(&sa_query->port->classport_lock,
1810 					  flags);
1811 			if (!status && !info->valid) {
1812 				memcpy(&info->data.opa, &rec,
1813 				       sizeof(info->data.opa));
1814 
1815 				info->valid = true;
1816 				info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1817 			}
1818 			spin_unlock_irqrestore(&sa_query->port->classport_lock,
1819 					       flags);
1820 
1821 		} else {
1822 			struct ib_class_port_info rec;
1823 
1824 			ib_unpack(ib_classport_info_rec_table,
1825 				  ARRAY_SIZE(ib_classport_info_rec_table),
1826 				  mad->data, &rec);
1827 
1828 			spin_lock_irqsave(&sa_query->port->classport_lock,
1829 					  flags);
1830 			if (!status && !info->valid) {
1831 				memcpy(&info->data.ib, &rec,
1832 				       sizeof(info->data.ib));
1833 
1834 				info->valid = true;
1835 				info->data.type = RDMA_CLASS_PORT_INFO_IB;
1836 			}
1837 			spin_unlock_irqrestore(&sa_query->port->classport_lock,
1838 					       flags);
1839 		}
1840 	}
1841 	query->callback(query->context);
1842 }
1843 
1844 static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1845 {
1846 	kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1847 			   sa_query));
1848 }
1849 
1850 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1851 					  unsigned long timeout_ms,
1852 					  void (*callback)(void *context),
1853 					  void *context,
1854 					  struct ib_sa_query **sa_query)
1855 {
1856 	struct ib_mad_agent *agent;
1857 	struct ib_sa_classport_info_query *query;
1858 	struct ib_sa_mad *mad;
1859 	gfp_t gfp_mask = GFP_KERNEL;
1860 	int ret;
1861 
1862 	agent = port->agent;
1863 
1864 	query = kzalloc(sizeof(*query), gfp_mask);
1865 	if (!query)
1866 		return -ENOMEM;
1867 
1868 	query->sa_query.port = port;
1869 	query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1870 						 port->port_num) ?
1871 				 IB_SA_QUERY_OPA : 0;
1872 	ret = alloc_mad(&query->sa_query, gfp_mask);
1873 	if (ret)
1874 		goto err_free;
1875 
1876 	query->callback = callback;
1877 	query->context = context;
1878 
1879 	mad = query->sa_query.mad_buf->mad;
1880 	init_mad(&query->sa_query, agent);
1881 
1882 	query->sa_query.callback = ib_sa_classport_info_rec_callback;
1883 	query->sa_query.release  = ib_sa_classport_info_rec_release;
1884 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
1885 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1886 	mad->sa_hdr.comp_mask	 = 0;
1887 	*sa_query = &query->sa_query;
1888 
1889 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1890 	if (ret < 0)
1891 		goto err_free_mad;
1892 
1893 	return ret;
1894 
1895 err_free_mad:
1896 	*sa_query = NULL;
1897 	free_mad(&query->sa_query);
1898 
1899 err_free:
1900 	kfree(query);
1901 	return ret;
1902 }
1903 
1904 static void update_ib_cpi(struct work_struct *work)
1905 {
1906 	struct ib_sa_port *port =
1907 		container_of(work, struct ib_sa_port, ib_cpi_work.work);
1908 	struct ib_classport_info_context *cb_context;
1909 	unsigned long flags;
1910 	int ret;
1911 
1912 	/* If the classport info is valid, nothing
1913 	 * to do here.
1914 	 */
1915 	spin_lock_irqsave(&port->classport_lock, flags);
1916 	if (port->classport_info.valid) {
1917 		spin_unlock_irqrestore(&port->classport_lock, flags);
1918 		return;
1919 	}
1920 	spin_unlock_irqrestore(&port->classport_lock, flags);
1921 
1922 	cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1923 	if (!cb_context)
1924 		goto err_nomem;
1925 
1926 	init_completion(&cb_context->done);
1927 
1928 	ret = ib_sa_classport_info_rec_query(port, 3000,
1929 					     ib_classportinfo_cb, cb_context,
1930 					     &cb_context->sa_query);
1931 	if (ret < 0)
1932 		goto free_cb_err;
1933 	wait_for_completion(&cb_context->done);
1934 free_cb_err:
1935 	kfree(cb_context);
1936 	spin_lock_irqsave(&port->classport_lock, flags);
1937 
1938 	/* If the classport info is still not valid, the query should have
1939 	 * failed for some reason. Retry issuing the query
1940 	 */
1941 	if (!port->classport_info.valid) {
1942 		port->classport_info.retry_cnt++;
1943 		if (port->classport_info.retry_cnt <=
1944 		    IB_SA_CPI_MAX_RETRY_CNT) {
1945 			unsigned long delay =
1946 				msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1947 
1948 			queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1949 		}
1950 	}
1951 	spin_unlock_irqrestore(&port->classport_lock, flags);
1952 
1953 err_nomem:
1954 	return;
1955 }
1956 
1957 static void send_handler(struct ib_mad_agent *agent,
1958 			 struct ib_mad_send_wc *mad_send_wc)
1959 {
1960 	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1961 	unsigned long flags;
1962 
1963 	if (query->callback)
1964 		switch (mad_send_wc->status) {
1965 		case IB_WC_SUCCESS:
1966 			/* No callback -- already got recv */
1967 			break;
1968 		case IB_WC_RESP_TIMEOUT_ERR:
1969 			query->callback(query, -ETIMEDOUT, NULL);
1970 			break;
1971 		case IB_WC_WR_FLUSH_ERR:
1972 			query->callback(query, -EINTR, NULL);
1973 			break;
1974 		default:
1975 			query->callback(query, -EIO, NULL);
1976 			break;
1977 		}
1978 
1979 	xa_lock_irqsave(&queries, flags);
1980 	__xa_erase(&queries, query->id);
1981 	xa_unlock_irqrestore(&queries, flags);
1982 
1983 	free_mad(query);
1984 	if (query->client)
1985 		ib_sa_client_put(query->client);
1986 	query->release(query);
1987 }
1988 
1989 static void recv_handler(struct ib_mad_agent *mad_agent,
1990 			 struct ib_mad_send_buf *send_buf,
1991 			 struct ib_mad_recv_wc *mad_recv_wc)
1992 {
1993 	struct ib_sa_query *query;
1994 
1995 	if (!send_buf)
1996 		return;
1997 
1998 	query = send_buf->context[0];
1999 	if (query->callback) {
2000 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2001 			query->callback(query,
2002 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2003 					-EINVAL : 0,
2004 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2005 		else
2006 			query->callback(query, -EIO, NULL);
2007 	}
2008 
2009 	ib_free_recv_mad(mad_recv_wc);
2010 }
2011 
2012 static void update_sm_ah(struct work_struct *work)
2013 {
2014 	struct ib_sa_port *port =
2015 		container_of(work, struct ib_sa_port, update_task);
2016 	struct ib_sa_sm_ah *new_ah;
2017 	struct ib_port_attr port_attr;
2018 	struct rdma_ah_attr   ah_attr;
2019 	bool grh_required;
2020 
2021 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2022 		pr_warn("Couldn't query port\n");
2023 		return;
2024 	}
2025 
2026 	new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2027 	if (!new_ah)
2028 		return;
2029 
2030 	kref_init(&new_ah->ref);
2031 	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2032 
2033 	new_ah->pkey_index = 0;
2034 	if (ib_find_pkey(port->agent->device, port->port_num,
2035 			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2036 		pr_err("Couldn't find index for default PKey\n");
2037 
2038 	memset(&ah_attr, 0, sizeof(ah_attr));
2039 	ah_attr.type = rdma_ah_find_type(port->agent->device,
2040 					 port->port_num);
2041 	rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2042 	rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2043 	rdma_ah_set_port_num(&ah_attr, port->port_num);
2044 
2045 	grh_required = rdma_is_grh_required(port->agent->device,
2046 					    port->port_num);
2047 
2048 	/*
2049 	 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2050 	 * differentiated from a permissive LID of 0xFFFF.  We set the
2051 	 * grh_required flag here so the SA can program the DGID in the
2052 	 * address handle appropriately
2053 	 */
2054 	if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2055 	    (grh_required ||
2056 	     port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2057 		rdma_ah_set_make_grd(&ah_attr, true);
2058 
2059 	if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2060 		rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2061 		rdma_ah_set_subnet_prefix(&ah_attr,
2062 					  cpu_to_be64(port_attr.subnet_prefix));
2063 		rdma_ah_set_interface_id(&ah_attr,
2064 					 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2065 	}
2066 
2067 	new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2068 				    RDMA_CREATE_AH_SLEEPABLE);
2069 	if (IS_ERR(new_ah->ah)) {
2070 		pr_warn("Couldn't create new SM AH\n");
2071 		kfree(new_ah);
2072 		return;
2073 	}
2074 
2075 	spin_lock_irq(&port->ah_lock);
2076 	if (port->sm_ah)
2077 		kref_put(&port->sm_ah->ref, free_sm_ah);
2078 	port->sm_ah = new_ah;
2079 	spin_unlock_irq(&port->ah_lock);
2080 }
2081 
2082 static void ib_sa_event(struct ib_event_handler *handler,
2083 			struct ib_event *event)
2084 {
2085 	if (event->event == IB_EVENT_PORT_ERR    ||
2086 	    event->event == IB_EVENT_PORT_ACTIVE ||
2087 	    event->event == IB_EVENT_LID_CHANGE  ||
2088 	    event->event == IB_EVENT_PKEY_CHANGE ||
2089 	    event->event == IB_EVENT_SM_CHANGE   ||
2090 	    event->event == IB_EVENT_CLIENT_REREGISTER) {
2091 		unsigned long flags;
2092 		struct ib_sa_device *sa_dev =
2093 			container_of(handler, typeof(*sa_dev), event_handler);
2094 		u32 port_num = event->element.port_num - sa_dev->start_port;
2095 		struct ib_sa_port *port = &sa_dev->port[port_num];
2096 
2097 		if (!rdma_cap_ib_sa(handler->device, port->port_num))
2098 			return;
2099 
2100 		spin_lock_irqsave(&port->ah_lock, flags);
2101 		if (port->sm_ah)
2102 			kref_put(&port->sm_ah->ref, free_sm_ah);
2103 		port->sm_ah = NULL;
2104 		spin_unlock_irqrestore(&port->ah_lock, flags);
2105 
2106 		if (event->event == IB_EVENT_SM_CHANGE ||
2107 		    event->event == IB_EVENT_CLIENT_REREGISTER ||
2108 		    event->event == IB_EVENT_LID_CHANGE ||
2109 		    event->event == IB_EVENT_PORT_ACTIVE) {
2110 			unsigned long delay =
2111 				msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2112 
2113 			spin_lock_irqsave(&port->classport_lock, flags);
2114 			port->classport_info.valid = false;
2115 			port->classport_info.retry_cnt = 0;
2116 			spin_unlock_irqrestore(&port->classport_lock, flags);
2117 			queue_delayed_work(ib_wq,
2118 					   &port->ib_cpi_work, delay);
2119 		}
2120 		queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2121 	}
2122 }
2123 
2124 static int ib_sa_add_one(struct ib_device *device)
2125 {
2126 	struct ib_sa_device *sa_dev;
2127 	int s, e, i;
2128 	int count = 0;
2129 	int ret;
2130 
2131 	s = rdma_start_port(device);
2132 	e = rdma_end_port(device);
2133 
2134 	sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2135 	if (!sa_dev)
2136 		return -ENOMEM;
2137 
2138 	sa_dev->start_port = s;
2139 	sa_dev->end_port   = e;
2140 
2141 	for (i = 0; i <= e - s; ++i) {
2142 		spin_lock_init(&sa_dev->port[i].ah_lock);
2143 		if (!rdma_cap_ib_sa(device, i + 1))
2144 			continue;
2145 
2146 		sa_dev->port[i].sm_ah    = NULL;
2147 		sa_dev->port[i].port_num = i + s;
2148 
2149 		spin_lock_init(&sa_dev->port[i].classport_lock);
2150 		sa_dev->port[i].classport_info.valid = false;
2151 
2152 		sa_dev->port[i].agent =
2153 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2154 					      NULL, 0, send_handler,
2155 					      recv_handler, sa_dev, 0);
2156 		if (IS_ERR(sa_dev->port[i].agent)) {
2157 			ret = PTR_ERR(sa_dev->port[i].agent);
2158 			goto err;
2159 		}
2160 
2161 		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2162 		INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2163 				  update_ib_cpi);
2164 
2165 		count++;
2166 	}
2167 
2168 	if (!count) {
2169 		ret = -EOPNOTSUPP;
2170 		goto free;
2171 	}
2172 
2173 	ib_set_client_data(device, &sa_client, sa_dev);
2174 
2175 	/*
2176 	 * We register our event handler after everything is set up,
2177 	 * and then update our cached info after the event handler is
2178 	 * registered to avoid any problems if a port changes state
2179 	 * during our initialization.
2180 	 */
2181 
2182 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2183 	ib_register_event_handler(&sa_dev->event_handler);
2184 
2185 	for (i = 0; i <= e - s; ++i) {
2186 		if (rdma_cap_ib_sa(device, i + 1))
2187 			update_sm_ah(&sa_dev->port[i].update_task);
2188 	}
2189 
2190 	return 0;
2191 
2192 err:
2193 	while (--i >= 0) {
2194 		if (rdma_cap_ib_sa(device, i + 1))
2195 			ib_unregister_mad_agent(sa_dev->port[i].agent);
2196 	}
2197 free:
2198 	kfree(sa_dev);
2199 	return ret;
2200 }
2201 
2202 static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2203 {
2204 	struct ib_sa_device *sa_dev = client_data;
2205 	int i;
2206 
2207 	ib_unregister_event_handler(&sa_dev->event_handler);
2208 	flush_workqueue(ib_wq);
2209 
2210 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2211 		if (rdma_cap_ib_sa(device, i + 1)) {
2212 			cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2213 			ib_unregister_mad_agent(sa_dev->port[i].agent);
2214 			if (sa_dev->port[i].sm_ah)
2215 				kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2216 		}
2217 
2218 	}
2219 
2220 	kfree(sa_dev);
2221 }
2222 
2223 int ib_sa_init(void)
2224 {
2225 	int ret;
2226 
2227 	get_random_bytes(&tid, sizeof tid);
2228 
2229 	atomic_set(&ib_nl_sa_request_seq, 0);
2230 
2231 	ret = ib_register_client(&sa_client);
2232 	if (ret) {
2233 		pr_err("Couldn't register ib_sa client\n");
2234 		goto err1;
2235 	}
2236 
2237 	ret = mcast_init();
2238 	if (ret) {
2239 		pr_err("Couldn't initialize multicast handling\n");
2240 		goto err2;
2241 	}
2242 
2243 	ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2244 	if (!ib_nl_wq) {
2245 		ret = -ENOMEM;
2246 		goto err3;
2247 	}
2248 
2249 	INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2250 
2251 	return 0;
2252 
2253 err3:
2254 	mcast_cleanup();
2255 err2:
2256 	ib_unregister_client(&sa_client);
2257 err1:
2258 	return ret;
2259 }
2260 
2261 void ib_sa_cleanup(void)
2262 {
2263 	cancel_delayed_work(&ib_nl_timed_work);
2264 	destroy_workqueue(ib_nl_wq);
2265 	mcast_cleanup();
2266 	ib_unregister_client(&sa_client);
2267 	WARN_ON(!xa_empty(&queries));
2268 }
2269