xref: /openbmc/linux/drivers/infiniband/core/cache.c (revision 4cff79e9)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Intel Corporation. All rights reserved.
4  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/errno.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/netdevice.h>
41 #include <net/addrconf.h>
42 
43 #include <rdma/ib_cache.h>
44 
45 #include "core_priv.h"
46 
47 struct ib_pkey_cache {
48 	int             table_len;
49 	u16             table[0];
50 };
51 
52 struct ib_update_work {
53 	struct work_struct work;
54 	struct ib_device  *device;
55 	u8                 port_num;
56 	bool		   enforce_security;
57 };
58 
59 union ib_gid zgid;
60 EXPORT_SYMBOL(zgid);
61 
62 enum gid_attr_find_mask {
63 	GID_ATTR_FIND_MASK_GID          = 1UL << 0,
64 	GID_ATTR_FIND_MASK_NETDEV	= 1UL << 1,
65 	GID_ATTR_FIND_MASK_DEFAULT	= 1UL << 2,
66 	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
67 };
68 
69 enum gid_table_entry_props {
70 	GID_TABLE_ENTRY_INVALID		= 1UL << 0,
71 	GID_TABLE_ENTRY_DEFAULT		= 1UL << 1,
72 };
73 
74 struct ib_gid_table_entry {
75 	unsigned long	    props;
76 	union ib_gid        gid;
77 	struct ib_gid_attr  attr;
78 	void		   *context;
79 };
80 
81 struct ib_gid_table {
82 	int                  sz;
83 	/* In RoCE, adding a GID to the table requires:
84 	 * (a) Find if this GID is already exists.
85 	 * (b) Find a free space.
86 	 * (c) Write the new GID
87 	 *
88 	 * Delete requires different set of operations:
89 	 * (a) Find the GID
90 	 * (b) Delete it.
91 	 *
92 	 **/
93 	/* Any writer to data_vec must hold this lock and the write side of
94 	 * rwlock. readers must hold only rwlock. All writers must be in a
95 	 * sleepable context.
96 	 */
97 	struct mutex         lock;
98 	/* rwlock protects data_vec[ix]->props. */
99 	rwlock_t	     rwlock;
100 	struct ib_gid_table_entry *data_vec;
101 };
102 
103 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
104 {
105 	struct ib_event event;
106 
107 	event.device		= ib_dev;
108 	event.element.port_num	= port;
109 	event.event		= IB_EVENT_GID_CHANGE;
110 
111 	ib_dispatch_event(&event);
112 }
113 
114 static const char * const gid_type_str[] = {
115 	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
116 	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
117 };
118 
119 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
120 {
121 	if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
122 		return gid_type_str[gid_type];
123 
124 	return "Invalid GID type";
125 }
126 EXPORT_SYMBOL(ib_cache_gid_type_str);
127 
128 int ib_cache_gid_parse_type_str(const char *buf)
129 {
130 	unsigned int i;
131 	size_t len;
132 	int err = -EINVAL;
133 
134 	len = strlen(buf);
135 	if (len == 0)
136 		return -EINVAL;
137 
138 	if (buf[len - 1] == '\n')
139 		len--;
140 
141 	for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
142 		if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
143 		    len == strlen(gid_type_str[i])) {
144 			err = i;
145 			break;
146 		}
147 
148 	return err;
149 }
150 EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
151 
152 static void del_roce_gid(struct ib_device *device, u8 port_num,
153 			 struct ib_gid_table *table, int ix)
154 {
155 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
156 		 device->name, port_num, ix,
157 		 table->data_vec[ix].gid.raw);
158 
159 	if (rdma_cap_roce_gid_table(device, port_num))
160 		device->del_gid(&table->data_vec[ix].attr,
161 				&table->data_vec[ix].context);
162 	dev_put(table->data_vec[ix].attr.ndev);
163 }
164 
165 static int add_roce_gid(struct ib_gid_table *table,
166 			const union ib_gid *gid,
167 			const struct ib_gid_attr *attr)
168 {
169 	struct ib_gid_table_entry *entry;
170 	int ix = attr->index;
171 	int ret = 0;
172 
173 	if (!attr->ndev) {
174 		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
175 		       __func__, attr->device->name, attr->port_num,
176 		       attr->index);
177 		return -EINVAL;
178 	}
179 
180 	entry = &table->data_vec[ix];
181 	if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
182 		WARN(1, "GID table corruption device=%s port=%d index=%d\n",
183 		     attr->device->name, attr->port_num,
184 		     attr->index);
185 		return -EINVAL;
186 	}
187 
188 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
189 		ret = attr->device->add_gid(gid, attr, &entry->context);
190 		if (ret) {
191 			pr_err("%s GID add failed device=%s port=%d index=%d\n",
192 			       __func__, attr->device->name, attr->port_num,
193 			       attr->index);
194 			goto add_err;
195 		}
196 	}
197 	dev_hold(attr->ndev);
198 
199 add_err:
200 	if (!ret)
201 		pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
202 			 attr->device->name, attr->port_num, ix, gid->raw);
203 	return ret;
204 }
205 
206 /**
207  * add_modify_gid - Add or modify GID table entry
208  *
209  * @table:	GID table in which GID to be added or modified
210  * @gid:	GID content
211  * @attr:	Attributes of the GID
212  *
213  * Returns 0 on success or appropriate error code. It accepts zero
214  * GID addition for non RoCE ports for HCA's who report them as valid
215  * GID. However such zero GIDs are not added to the cache.
216  */
217 static int add_modify_gid(struct ib_gid_table *table,
218 			  const union ib_gid *gid,
219 			  const struct ib_gid_attr *attr)
220 {
221 	int ret;
222 
223 	if (rdma_protocol_roce(attr->device, attr->port_num)) {
224 		ret = add_roce_gid(table, gid, attr);
225 		if (ret)
226 			return ret;
227 	} else {
228 		/*
229 		 * Some HCA's report multiple GID entries with only one
230 		 * valid GID, but remaining as zero GID.
231 		 * So ignore such behavior for IB link layer and don't
232 		 * fail the call, but don't add such entry to GID cache.
233 		 */
234 		if (!memcmp(gid, &zgid, sizeof(*gid)))
235 			return 0;
236 	}
237 
238 	lockdep_assert_held(&table->lock);
239 	memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
240 	memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
241 
242 	write_lock_irq(&table->rwlock);
243 	table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
244 	write_unlock_irq(&table->rwlock);
245 	return 0;
246 }
247 
248 /**
249  * del_gid - Delete GID table entry
250  *
251  * @ib_dev:	IB device whose GID entry to be deleted
252  * @port:	Port number of the IB device
253  * @table:	GID table of the IB device for a port
254  * @ix:		GID entry index to delete
255  *
256  */
257 static void del_gid(struct ib_device *ib_dev, u8 port,
258 		    struct ib_gid_table *table, int ix)
259 {
260 	lockdep_assert_held(&table->lock);
261 	write_lock_irq(&table->rwlock);
262 	table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
263 	write_unlock_irq(&table->rwlock);
264 
265 	if (rdma_protocol_roce(ib_dev, port))
266 		del_roce_gid(ib_dev, port, table, ix);
267 	memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
268 	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
269 	table->data_vec[ix].context = NULL;
270 }
271 
272 /* rwlock should be read locked, or lock should be held */
273 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
274 		    const struct ib_gid_attr *val, bool default_gid,
275 		    unsigned long mask, int *pempty)
276 {
277 	int i = 0;
278 	int found = -1;
279 	int empty = pempty ? -1 : 0;
280 
281 	while (i < table->sz && (found < 0 || empty < 0)) {
282 		struct ib_gid_table_entry *data = &table->data_vec[i];
283 		struct ib_gid_attr *attr = &data->attr;
284 		int curr_index = i;
285 
286 		i++;
287 
288 		/* find_gid() is used during GID addition where it is expected
289 		 * to return a free entry slot which is not duplicate.
290 		 * Free entry slot is requested and returned if pempty is set,
291 		 * so lookup free slot only if requested.
292 		 */
293 		if (pempty && empty < 0) {
294 			if (data->props & GID_TABLE_ENTRY_INVALID &&
295 			    (default_gid ==
296 			     !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
297 				/*
298 				 * Found an invalid (free) entry; allocate it.
299 				 * If default GID is requested, then our
300 				 * found slot must be one of the DEFAULT
301 				 * reserved slots or we fail.
302 				 * This ensures that only DEFAULT reserved
303 				 * slots are used for default property GIDs.
304 				 */
305 				empty = curr_index;
306 			}
307 		}
308 
309 		/*
310 		 * Additionally find_gid() is used to find valid entry during
311 		 * lookup operation, where validity needs to be checked. So
312 		 * find the empty entry first to continue to search for a free
313 		 * slot and ignore its INVALID flag.
314 		 */
315 		if (data->props & GID_TABLE_ENTRY_INVALID)
316 			continue;
317 
318 		if (found >= 0)
319 			continue;
320 
321 		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
322 		    attr->gid_type != val->gid_type)
323 			continue;
324 
325 		if (mask & GID_ATTR_FIND_MASK_GID &&
326 		    memcmp(gid, &data->gid, sizeof(*gid)))
327 			continue;
328 
329 		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
330 		    attr->ndev != val->ndev)
331 			continue;
332 
333 		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
334 		    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
335 		    default_gid)
336 			continue;
337 
338 		found = curr_index;
339 	}
340 
341 	if (pempty)
342 		*pempty = empty;
343 
344 	return found;
345 }
346 
347 static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
348 {
349 	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
350 	addrconf_ifid_eui48(&gid->raw[8], dev);
351 }
352 
353 static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
354 			      union ib_gid *gid, struct ib_gid_attr *attr,
355 			      unsigned long mask, bool default_gid)
356 {
357 	struct ib_gid_table *table;
358 	int ret = 0;
359 	int empty;
360 	int ix;
361 
362 	/* Do not allow adding zero GID in support of
363 	 * IB spec version 1.3 section 4.1.1 point (6) and
364 	 * section 12.7.10 and section 12.7.20
365 	 */
366 	if (!memcmp(gid, &zgid, sizeof(*gid)))
367 		return -EINVAL;
368 
369 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
370 
371 	mutex_lock(&table->lock);
372 
373 	ix = find_gid(table, gid, attr, default_gid, mask, &empty);
374 	if (ix >= 0)
375 		goto out_unlock;
376 
377 	if (empty < 0) {
378 		ret = -ENOSPC;
379 		goto out_unlock;
380 	}
381 	attr->device = ib_dev;
382 	attr->index = empty;
383 	attr->port_num = port;
384 	ret = add_modify_gid(table, gid, attr);
385 	if (!ret)
386 		dispatch_gid_change_event(ib_dev, port);
387 
388 out_unlock:
389 	mutex_unlock(&table->lock);
390 	if (ret)
391 		pr_warn("%s: unable to add gid %pI6 error=%d\n",
392 			__func__, gid->raw, ret);
393 	return ret;
394 }
395 
396 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
397 		     union ib_gid *gid, struct ib_gid_attr *attr)
398 {
399 	struct net_device *idev;
400 	unsigned long mask;
401 	int ret;
402 
403 	if (ib_dev->get_netdev) {
404 		idev = ib_dev->get_netdev(ib_dev, port);
405 		if (idev && attr->ndev != idev) {
406 			union ib_gid default_gid;
407 
408 			/* Adding default GIDs in not permitted */
409 			make_default_gid(idev, &default_gid);
410 			if (!memcmp(gid, &default_gid, sizeof(*gid))) {
411 				dev_put(idev);
412 				return -EPERM;
413 			}
414 		}
415 		if (idev)
416 			dev_put(idev);
417 	}
418 
419 	mask = GID_ATTR_FIND_MASK_GID |
420 	       GID_ATTR_FIND_MASK_GID_TYPE |
421 	       GID_ATTR_FIND_MASK_NETDEV;
422 
423 	ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
424 	return ret;
425 }
426 
427 static int
428 _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
429 		  union ib_gid *gid, struct ib_gid_attr *attr,
430 		  unsigned long mask, bool default_gid)
431 {
432 	struct ib_gid_table *table;
433 	int ret = 0;
434 	int ix;
435 
436 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
437 
438 	mutex_lock(&table->lock);
439 
440 	ix = find_gid(table, gid, attr, default_gid, mask, NULL);
441 	if (ix < 0) {
442 		ret = -EINVAL;
443 		goto out_unlock;
444 	}
445 
446 	del_gid(ib_dev, port, table, ix);
447 	dispatch_gid_change_event(ib_dev, port);
448 
449 out_unlock:
450 	mutex_unlock(&table->lock);
451 	if (ret)
452 		pr_debug("%s: can't delete gid %pI6 error=%d\n",
453 			 __func__, gid->raw, ret);
454 	return ret;
455 }
456 
457 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
458 		     union ib_gid *gid, struct ib_gid_attr *attr)
459 {
460 	unsigned long mask = GID_ATTR_FIND_MASK_GID	  |
461 			     GID_ATTR_FIND_MASK_GID_TYPE |
462 			     GID_ATTR_FIND_MASK_DEFAULT  |
463 			     GID_ATTR_FIND_MASK_NETDEV;
464 
465 	return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
466 }
467 
468 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
469 				     struct net_device *ndev)
470 {
471 	struct ib_gid_table *table;
472 	int ix;
473 	bool deleted = false;
474 
475 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
476 
477 	mutex_lock(&table->lock);
478 
479 	for (ix = 0; ix < table->sz; ix++) {
480 		if (table->data_vec[ix].attr.ndev == ndev) {
481 			del_gid(ib_dev, port, table, ix);
482 			deleted = true;
483 		}
484 	}
485 
486 	mutex_unlock(&table->lock);
487 
488 	if (deleted)
489 		dispatch_gid_change_event(ib_dev, port);
490 
491 	return 0;
492 }
493 
494 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
495 			      union ib_gid *gid, struct ib_gid_attr *attr)
496 {
497 	struct ib_gid_table *table;
498 
499 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
500 
501 	if (index < 0 || index >= table->sz)
502 		return -EINVAL;
503 
504 	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
505 		return -EAGAIN;
506 
507 	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
508 	if (attr) {
509 		memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
510 		if (attr->ndev)
511 			dev_hold(attr->ndev);
512 	}
513 
514 	return 0;
515 }
516 
517 static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
518 				    const union ib_gid *gid,
519 				    const struct ib_gid_attr *val,
520 				    unsigned long mask,
521 				    u8 *port, u16 *index)
522 {
523 	struct ib_gid_table *table;
524 	u8 p;
525 	int local_index;
526 	unsigned long flags;
527 
528 	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
529 		table = ib_dev->cache.ports[p].gid;
530 		read_lock_irqsave(&table->rwlock, flags);
531 		local_index = find_gid(table, gid, val, false, mask, NULL);
532 		if (local_index >= 0) {
533 			if (index)
534 				*index = local_index;
535 			if (port)
536 				*port = p + rdma_start_port(ib_dev);
537 			read_unlock_irqrestore(&table->rwlock, flags);
538 			return 0;
539 		}
540 		read_unlock_irqrestore(&table->rwlock, flags);
541 	}
542 
543 	return -ENOENT;
544 }
545 
546 static int ib_cache_gid_find(struct ib_device *ib_dev,
547 			     const union ib_gid *gid,
548 			     enum ib_gid_type gid_type,
549 			     struct net_device *ndev, u8 *port,
550 			     u16 *index)
551 {
552 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
553 			     GID_ATTR_FIND_MASK_GID_TYPE;
554 	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
555 
556 	if (ndev)
557 		mask |= GID_ATTR_FIND_MASK_NETDEV;
558 
559 	return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
560 					mask, port, index);
561 }
562 
563 /**
564  * ib_find_cached_gid_by_port - Returns the GID table index where a specified
565  * GID value occurs. It searches for the specified GID value in the local
566  * software cache.
567  * @device: The device to query.
568  * @gid: The GID value to search for.
569  * @gid_type: The GID type to search for.
570  * @port_num: The port number of the device where the GID value should be
571  *   searched.
572  * @ndev: In RoCE, the net device of the device. Null means ignore.
573  * @index: The index into the cached GID table where the GID was found. This
574  *   parameter may be NULL.
575  */
576 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
577 			       const union ib_gid *gid,
578 			       enum ib_gid_type gid_type,
579 			       u8 port, struct net_device *ndev,
580 			       u16 *index)
581 {
582 	int local_index;
583 	struct ib_gid_table *table;
584 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
585 			     GID_ATTR_FIND_MASK_GID_TYPE;
586 	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
587 	unsigned long flags;
588 
589 	if (!rdma_is_port_valid(ib_dev, port))
590 		return -ENOENT;
591 
592 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
593 
594 	if (ndev)
595 		mask |= GID_ATTR_FIND_MASK_NETDEV;
596 
597 	read_lock_irqsave(&table->rwlock, flags);
598 	local_index = find_gid(table, gid, &val, false, mask, NULL);
599 	if (local_index >= 0) {
600 		if (index)
601 			*index = local_index;
602 		read_unlock_irqrestore(&table->rwlock, flags);
603 		return 0;
604 	}
605 
606 	read_unlock_irqrestore(&table->rwlock, flags);
607 	return -ENOENT;
608 }
609 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
610 
611 /**
612  * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
613  * GID value occurs
614  * @device: The device to query.
615  * @gid: The GID value to search for.
616  * @port_num: The port number of the device where the GID value could be
617  *   searched.
618  * @filter: The filter function is executed on any matching GID in the table.
619  *   If the filter function returns true, the corresponding index is returned,
620  *   otherwise, we continue searching the GID table. It's guaranteed that
621  *   while filter is executed, ndev field is valid and the structure won't
622  *   change. filter is executed in an atomic context. filter must not be NULL.
623  * @index: The index into the cached GID table where the GID was found. This
624  *   parameter may be NULL.
625  *
626  * ib_cache_gid_find_by_filter() searches for the specified GID value
627  * of which the filter function returns true in the port's GID table.
628  * This function is only supported on RoCE ports.
629  *
630  */
631 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
632 				       const union ib_gid *gid,
633 				       u8 port,
634 				       bool (*filter)(const union ib_gid *,
635 						      const struct ib_gid_attr *,
636 						      void *),
637 				       void *context,
638 				       u16 *index)
639 {
640 	struct ib_gid_table *table;
641 	unsigned int i;
642 	unsigned long flags;
643 	bool found = false;
644 
645 
646 	if (!rdma_is_port_valid(ib_dev, port) ||
647 	    !rdma_protocol_roce(ib_dev, port))
648 		return -EPROTONOSUPPORT;
649 
650 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
651 
652 	read_lock_irqsave(&table->rwlock, flags);
653 	for (i = 0; i < table->sz; i++) {
654 		struct ib_gid_attr attr;
655 
656 		if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
657 			continue;
658 
659 		if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
660 			continue;
661 
662 		memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
663 
664 		if (filter(gid, &attr, context)) {
665 			found = true;
666 			if (index)
667 				*index = i;
668 			break;
669 		}
670 	}
671 	read_unlock_irqrestore(&table->rwlock, flags);
672 
673 	if (!found)
674 		return -ENOENT;
675 	return 0;
676 }
677 
678 static struct ib_gid_table *alloc_gid_table(int sz)
679 {
680 	struct ib_gid_table *table =
681 		kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
682 	int i;
683 
684 	if (!table)
685 		return NULL;
686 
687 	table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
688 	if (!table->data_vec)
689 		goto err_free_table;
690 
691 	mutex_init(&table->lock);
692 
693 	table->sz = sz;
694 	rwlock_init(&table->rwlock);
695 
696 	/* Mark all entries as invalid so that allocator can allocate
697 	 * one of the invalid (free) entry.
698 	 */
699 	for (i = 0; i < sz; i++)
700 		table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
701 	return table;
702 
703 err_free_table:
704 	kfree(table);
705 	return NULL;
706 }
707 
708 static void release_gid_table(struct ib_gid_table *table)
709 {
710 	if (table) {
711 		kfree(table->data_vec);
712 		kfree(table);
713 	}
714 }
715 
716 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
717 				   struct ib_gid_table *table)
718 {
719 	int i;
720 	bool deleted = false;
721 
722 	if (!table)
723 		return;
724 
725 	mutex_lock(&table->lock);
726 	for (i = 0; i < table->sz; ++i) {
727 		if (memcmp(&table->data_vec[i].gid, &zgid,
728 			   sizeof(table->data_vec[i].gid))) {
729 			del_gid(ib_dev, port, table, i);
730 			deleted = true;
731 		}
732 	}
733 	mutex_unlock(&table->lock);
734 
735 	if (deleted)
736 		dispatch_gid_change_event(ib_dev, port);
737 }
738 
739 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
740 				  struct net_device *ndev,
741 				  unsigned long gid_type_mask,
742 				  enum ib_cache_gid_default_mode mode)
743 {
744 	union ib_gid gid = { };
745 	struct ib_gid_attr gid_attr;
746 	struct ib_gid_table *table;
747 	unsigned int gid_type;
748 	unsigned long mask;
749 
750 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
751 
752 	mask = GID_ATTR_FIND_MASK_GID_TYPE |
753 	       GID_ATTR_FIND_MASK_DEFAULT |
754 	       GID_ATTR_FIND_MASK_NETDEV;
755 	memset(&gid_attr, 0, sizeof(gid_attr));
756 	gid_attr.ndev = ndev;
757 
758 	for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
759 		if (1UL << gid_type & ~gid_type_mask)
760 			continue;
761 
762 		gid_attr.gid_type = gid_type;
763 
764 		if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
765 			make_default_gid(ndev, &gid);
766 			__ib_cache_gid_add(ib_dev, port, &gid,
767 					   &gid_attr, mask, true);
768 		} else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
769 			_ib_cache_gid_del(ib_dev, port, &gid,
770 					  &gid_attr, mask, true);
771 		}
772 	}
773 }
774 
775 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
776 				     struct ib_gid_table *table)
777 {
778 	unsigned int i;
779 	unsigned long roce_gid_type_mask;
780 	unsigned int num_default_gids;
781 	unsigned int current_gid = 0;
782 
783 	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
784 	num_default_gids = hweight_long(roce_gid_type_mask);
785 	for (i = 0; i < num_default_gids && i < table->sz; i++) {
786 		struct ib_gid_table_entry *entry =
787 			&table->data_vec[i];
788 
789 		entry->props |= GID_TABLE_ENTRY_DEFAULT;
790 		current_gid = find_next_bit(&roce_gid_type_mask,
791 					    BITS_PER_LONG,
792 					    current_gid);
793 		entry->attr.gid_type = current_gid++;
794 	}
795 
796 	return 0;
797 }
798 
799 static int _gid_table_setup_one(struct ib_device *ib_dev)
800 {
801 	u8 port;
802 	struct ib_gid_table *table;
803 	int err = 0;
804 
805 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
806 		u8 rdma_port = port + rdma_start_port(ib_dev);
807 
808 		table =
809 			alloc_gid_table(
810 				ib_dev->port_immutable[rdma_port].gid_tbl_len);
811 		if (!table) {
812 			err = -ENOMEM;
813 			goto rollback_table_setup;
814 		}
815 
816 		err = gid_table_reserve_default(ib_dev,
817 						port + rdma_start_port(ib_dev),
818 						table);
819 		if (err)
820 			goto rollback_table_setup;
821 		ib_dev->cache.ports[port].gid = table;
822 	}
823 
824 	return 0;
825 
826 rollback_table_setup:
827 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
828 		table = ib_dev->cache.ports[port].gid;
829 
830 		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
831 				       table);
832 		release_gid_table(table);
833 	}
834 
835 	return err;
836 }
837 
838 static void gid_table_release_one(struct ib_device *ib_dev)
839 {
840 	struct ib_gid_table *table;
841 	u8 port;
842 
843 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
844 		table = ib_dev->cache.ports[port].gid;
845 		release_gid_table(table);
846 		ib_dev->cache.ports[port].gid = NULL;
847 	}
848 }
849 
850 static void gid_table_cleanup_one(struct ib_device *ib_dev)
851 {
852 	struct ib_gid_table *table;
853 	u8 port;
854 
855 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
856 		table = ib_dev->cache.ports[port].gid;
857 		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
858 				       table);
859 	}
860 }
861 
862 static int gid_table_setup_one(struct ib_device *ib_dev)
863 {
864 	int err;
865 
866 	err = _gid_table_setup_one(ib_dev);
867 
868 	if (err)
869 		return err;
870 
871 	rdma_roce_rescan_device(ib_dev);
872 
873 	return err;
874 }
875 
876 int ib_get_cached_gid(struct ib_device *device,
877 		      u8                port_num,
878 		      int               index,
879 		      union ib_gid     *gid,
880 		      struct ib_gid_attr *gid_attr)
881 {
882 	int res;
883 	unsigned long flags;
884 	struct ib_gid_table *table;
885 
886 	if (!rdma_is_port_valid(device, port_num))
887 		return -EINVAL;
888 
889 	table = device->cache.ports[port_num - rdma_start_port(device)].gid;
890 	read_lock_irqsave(&table->rwlock, flags);
891 	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
892 	read_unlock_irqrestore(&table->rwlock, flags);
893 
894 	return res;
895 }
896 EXPORT_SYMBOL(ib_get_cached_gid);
897 
898 /**
899  * ib_find_cached_gid - Returns the port number and GID table index where
900  *   a specified GID value occurs.
901  * @device: The device to query.
902  * @gid: The GID value to search for.
903  * @gid_type: The GID type to search for.
904  * @ndev: In RoCE, the net device of the device. NULL means ignore.
905  * @port_num: The port number of the device where the GID value was found.
906  * @index: The index into the cached GID table where the GID was found.  This
907  *   parameter may be NULL.
908  *
909  * ib_find_cached_gid() searches for the specified GID value in
910  * the local software cache.
911  */
912 int ib_find_cached_gid(struct ib_device *device,
913 		       const union ib_gid *gid,
914 		       enum ib_gid_type gid_type,
915 		       struct net_device *ndev,
916 		       u8               *port_num,
917 		       u16              *index)
918 {
919 	return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
920 }
921 EXPORT_SYMBOL(ib_find_cached_gid);
922 
923 int ib_find_gid_by_filter(struct ib_device *device,
924 			  const union ib_gid *gid,
925 			  u8 port_num,
926 			  bool (*filter)(const union ib_gid *gid,
927 					 const struct ib_gid_attr *,
928 					 void *),
929 			  void *context, u16 *index)
930 {
931 	/* Only RoCE GID table supports filter function */
932 	if (!rdma_protocol_roce(device, port_num) && filter)
933 		return -EPROTONOSUPPORT;
934 
935 	return ib_cache_gid_find_by_filter(device, gid,
936 					   port_num, filter,
937 					   context, index);
938 }
939 
940 int ib_get_cached_pkey(struct ib_device *device,
941 		       u8                port_num,
942 		       int               index,
943 		       u16              *pkey)
944 {
945 	struct ib_pkey_cache *cache;
946 	unsigned long flags;
947 	int ret = 0;
948 
949 	if (!rdma_is_port_valid(device, port_num))
950 		return -EINVAL;
951 
952 	read_lock_irqsave(&device->cache.lock, flags);
953 
954 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
955 
956 	if (index < 0 || index >= cache->table_len)
957 		ret = -EINVAL;
958 	else
959 		*pkey = cache->table[index];
960 
961 	read_unlock_irqrestore(&device->cache.lock, flags);
962 
963 	return ret;
964 }
965 EXPORT_SYMBOL(ib_get_cached_pkey);
966 
967 int ib_get_cached_subnet_prefix(struct ib_device *device,
968 				u8                port_num,
969 				u64              *sn_pfx)
970 {
971 	unsigned long flags;
972 	int p;
973 
974 	if (!rdma_is_port_valid(device, port_num))
975 		return -EINVAL;
976 
977 	p = port_num - rdma_start_port(device);
978 	read_lock_irqsave(&device->cache.lock, flags);
979 	*sn_pfx = device->cache.ports[p].subnet_prefix;
980 	read_unlock_irqrestore(&device->cache.lock, flags);
981 
982 	return 0;
983 }
984 EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
985 
986 int ib_find_cached_pkey(struct ib_device *device,
987 			u8                port_num,
988 			u16               pkey,
989 			u16              *index)
990 {
991 	struct ib_pkey_cache *cache;
992 	unsigned long flags;
993 	int i;
994 	int ret = -ENOENT;
995 	int partial_ix = -1;
996 
997 	if (!rdma_is_port_valid(device, port_num))
998 		return -EINVAL;
999 
1000 	read_lock_irqsave(&device->cache.lock, flags);
1001 
1002 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1003 
1004 	*index = -1;
1005 
1006 	for (i = 0; i < cache->table_len; ++i)
1007 		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
1008 			if (cache->table[i] & 0x8000) {
1009 				*index = i;
1010 				ret = 0;
1011 				break;
1012 			} else
1013 				partial_ix = i;
1014 		}
1015 
1016 	if (ret && partial_ix >= 0) {
1017 		*index = partial_ix;
1018 		ret = 0;
1019 	}
1020 
1021 	read_unlock_irqrestore(&device->cache.lock, flags);
1022 
1023 	return ret;
1024 }
1025 EXPORT_SYMBOL(ib_find_cached_pkey);
1026 
1027 int ib_find_exact_cached_pkey(struct ib_device *device,
1028 			      u8                port_num,
1029 			      u16               pkey,
1030 			      u16              *index)
1031 {
1032 	struct ib_pkey_cache *cache;
1033 	unsigned long flags;
1034 	int i;
1035 	int ret = -ENOENT;
1036 
1037 	if (!rdma_is_port_valid(device, port_num))
1038 		return -EINVAL;
1039 
1040 	read_lock_irqsave(&device->cache.lock, flags);
1041 
1042 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1043 
1044 	*index = -1;
1045 
1046 	for (i = 0; i < cache->table_len; ++i)
1047 		if (cache->table[i] == pkey) {
1048 			*index = i;
1049 			ret = 0;
1050 			break;
1051 		}
1052 
1053 	read_unlock_irqrestore(&device->cache.lock, flags);
1054 
1055 	return ret;
1056 }
1057 EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1058 
1059 int ib_get_cached_lmc(struct ib_device *device,
1060 		      u8                port_num,
1061 		      u8                *lmc)
1062 {
1063 	unsigned long flags;
1064 	int ret = 0;
1065 
1066 	if (!rdma_is_port_valid(device, port_num))
1067 		return -EINVAL;
1068 
1069 	read_lock_irqsave(&device->cache.lock, flags);
1070 	*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1071 	read_unlock_irqrestore(&device->cache.lock, flags);
1072 
1073 	return ret;
1074 }
1075 EXPORT_SYMBOL(ib_get_cached_lmc);
1076 
1077 int ib_get_cached_port_state(struct ib_device   *device,
1078 			     u8                  port_num,
1079 			     enum ib_port_state *port_state)
1080 {
1081 	unsigned long flags;
1082 	int ret = 0;
1083 
1084 	if (!rdma_is_port_valid(device, port_num))
1085 		return -EINVAL;
1086 
1087 	read_lock_irqsave(&device->cache.lock, flags);
1088 	*port_state = device->cache.ports[port_num
1089 		- rdma_start_port(device)].port_state;
1090 	read_unlock_irqrestore(&device->cache.lock, flags);
1091 
1092 	return ret;
1093 }
1094 EXPORT_SYMBOL(ib_get_cached_port_state);
1095 
1096 static int config_non_roce_gid_cache(struct ib_device *device,
1097 				     u8 port, int gid_tbl_len)
1098 {
1099 	struct ib_gid_attr gid_attr = {};
1100 	struct ib_gid_table *table;
1101 	union ib_gid gid;
1102 	int ret = 0;
1103 	int i;
1104 
1105 	gid_attr.device = device;
1106 	gid_attr.port_num = port;
1107 	table = device->cache.ports[port - rdma_start_port(device)].gid;
1108 
1109 	mutex_lock(&table->lock);
1110 	for (i = 0; i < gid_tbl_len; ++i) {
1111 		if (!device->query_gid)
1112 			continue;
1113 		ret = device->query_gid(device, port, i, &gid);
1114 		if (ret) {
1115 			pr_warn("query_gid failed (%d) for %s (index %d)\n",
1116 				ret, device->name, i);
1117 			goto err;
1118 		}
1119 		gid_attr.index = i;
1120 		add_modify_gid(table, &gid, &gid_attr);
1121 	}
1122 err:
1123 	mutex_unlock(&table->lock);
1124 	return ret;
1125 }
1126 
1127 static void ib_cache_update(struct ib_device *device,
1128 			    u8                port,
1129 			    bool	      enforce_security)
1130 {
1131 	struct ib_port_attr       *tprops = NULL;
1132 	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1133 	int                        i;
1134 	int                        ret;
1135 	struct ib_gid_table	  *table;
1136 
1137 	if (!rdma_is_port_valid(device, port))
1138 		return;
1139 
1140 	table = device->cache.ports[port - rdma_start_port(device)].gid;
1141 
1142 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1143 	if (!tprops)
1144 		return;
1145 
1146 	ret = ib_query_port(device, port, tprops);
1147 	if (ret) {
1148 		pr_warn("ib_query_port failed (%d) for %s\n",
1149 			ret, device->name);
1150 		goto err;
1151 	}
1152 
1153 	if (!rdma_protocol_roce(device, port)) {
1154 		ret = config_non_roce_gid_cache(device, port,
1155 						tprops->gid_tbl_len);
1156 		if (ret)
1157 			goto err;
1158 	}
1159 
1160 	pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
1161 			     sizeof *pkey_cache->table, GFP_KERNEL);
1162 	if (!pkey_cache)
1163 		goto err;
1164 
1165 	pkey_cache->table_len = tprops->pkey_tbl_len;
1166 
1167 	for (i = 0; i < pkey_cache->table_len; ++i) {
1168 		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1169 		if (ret) {
1170 			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1171 				ret, device->name, i);
1172 			goto err;
1173 		}
1174 	}
1175 
1176 	write_lock_irq(&device->cache.lock);
1177 
1178 	old_pkey_cache = device->cache.ports[port -
1179 		rdma_start_port(device)].pkey;
1180 
1181 	device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1182 	device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1183 	device->cache.ports[port - rdma_start_port(device)].port_state =
1184 		tprops->state;
1185 
1186 	device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
1187 							tprops->subnet_prefix;
1188 	write_unlock_irq(&device->cache.lock);
1189 
1190 	if (enforce_security)
1191 		ib_security_cache_change(device,
1192 					 port,
1193 					 tprops->subnet_prefix);
1194 
1195 	kfree(old_pkey_cache);
1196 	kfree(tprops);
1197 	return;
1198 
1199 err:
1200 	kfree(pkey_cache);
1201 	kfree(tprops);
1202 }
1203 
1204 static void ib_cache_task(struct work_struct *_work)
1205 {
1206 	struct ib_update_work *work =
1207 		container_of(_work, struct ib_update_work, work);
1208 
1209 	ib_cache_update(work->device,
1210 			work->port_num,
1211 			work->enforce_security);
1212 	kfree(work);
1213 }
1214 
1215 static void ib_cache_event(struct ib_event_handler *handler,
1216 			   struct ib_event *event)
1217 {
1218 	struct ib_update_work *work;
1219 
1220 	if (event->event == IB_EVENT_PORT_ERR    ||
1221 	    event->event == IB_EVENT_PORT_ACTIVE ||
1222 	    event->event == IB_EVENT_LID_CHANGE  ||
1223 	    event->event == IB_EVENT_PKEY_CHANGE ||
1224 	    event->event == IB_EVENT_SM_CHANGE   ||
1225 	    event->event == IB_EVENT_CLIENT_REREGISTER ||
1226 	    event->event == IB_EVENT_GID_CHANGE) {
1227 		work = kmalloc(sizeof *work, GFP_ATOMIC);
1228 		if (work) {
1229 			INIT_WORK(&work->work, ib_cache_task);
1230 			work->device   = event->device;
1231 			work->port_num = event->element.port_num;
1232 			if (event->event == IB_EVENT_PKEY_CHANGE ||
1233 			    event->event == IB_EVENT_GID_CHANGE)
1234 				work->enforce_security = true;
1235 			else
1236 				work->enforce_security = false;
1237 
1238 			queue_work(ib_wq, &work->work);
1239 		}
1240 	}
1241 }
1242 
1243 int ib_cache_setup_one(struct ib_device *device)
1244 {
1245 	int p;
1246 	int err;
1247 
1248 	rwlock_init(&device->cache.lock);
1249 
1250 	device->cache.ports =
1251 		kzalloc(sizeof(*device->cache.ports) *
1252 			(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1253 	if (!device->cache.ports)
1254 		return -ENOMEM;
1255 
1256 	err = gid_table_setup_one(device);
1257 	if (err) {
1258 		kfree(device->cache.ports);
1259 		device->cache.ports = NULL;
1260 		return err;
1261 	}
1262 
1263 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1264 		ib_cache_update(device, p + rdma_start_port(device), true);
1265 
1266 	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1267 			      device, ib_cache_event);
1268 	ib_register_event_handler(&device->cache.event_handler);
1269 	return 0;
1270 }
1271 
1272 void ib_cache_release_one(struct ib_device *device)
1273 {
1274 	int p;
1275 
1276 	/*
1277 	 * The release function frees all the cache elements.
1278 	 * This function should be called as part of freeing
1279 	 * all the device's resources when the cache could no
1280 	 * longer be accessed.
1281 	 */
1282 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1283 		kfree(device->cache.ports[p].pkey);
1284 
1285 	gid_table_release_one(device);
1286 	kfree(device->cache.ports);
1287 }
1288 
1289 void ib_cache_cleanup_one(struct ib_device *device)
1290 {
1291 	/* The cleanup function unregisters the event handler,
1292 	 * waits for all in-progress workqueue elements and cleans
1293 	 * up the GID cache. This function should be called after
1294 	 * the device was removed from the devices list and all
1295 	 * clients were removed, so the cache exists but is
1296 	 * non-functional and shouldn't be updated anymore.
1297 	 */
1298 	ib_unregister_event_handler(&device->cache.event_handler);
1299 	flush_workqueue(ib_wq);
1300 	gid_table_cleanup_one(device);
1301 }
1302 
1303 void __init ib_cache_setup(void)
1304 {
1305 	roce_gid_mgmt_init();
1306 }
1307 
1308 void __exit ib_cache_cleanup(void)
1309 {
1310 	roce_gid_mgmt_cleanup();
1311 }
1312