1 /* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/module.h> 37 #include <linux/errno.h> 38 #include <linux/slab.h> 39 #include <linux/workqueue.h> 40 #include <linux/netdevice.h> 41 #include <net/addrconf.h> 42 43 #include <rdma/ib_cache.h> 44 45 #include "core_priv.h" 46 47 struct ib_pkey_cache { 48 int table_len; 49 u16 table[0]; 50 }; 51 52 struct ib_update_work { 53 struct work_struct work; 54 struct ib_device *device; 55 u8 port_num; 56 bool enforce_security; 57 }; 58 59 union ib_gid zgid; 60 EXPORT_SYMBOL(zgid); 61 62 enum gid_attr_find_mask { 63 GID_ATTR_FIND_MASK_GID = 1UL << 0, 64 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 65 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 66 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 67 }; 68 69 enum gid_table_entry_props { 70 GID_TABLE_ENTRY_INVALID = 1UL << 0, 71 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 72 }; 73 74 struct ib_gid_table_entry { 75 unsigned long props; 76 union ib_gid gid; 77 struct ib_gid_attr attr; 78 void *context; 79 }; 80 81 struct ib_gid_table { 82 int sz; 83 /* In RoCE, adding a GID to the table requires: 84 * (a) Find if this GID is already exists. 85 * (b) Find a free space. 86 * (c) Write the new GID 87 * 88 * Delete requires different set of operations: 89 * (a) Find the GID 90 * (b) Delete it. 91 * 92 **/ 93 /* Any writer to data_vec must hold this lock and the write side of 94 * rwlock. readers must hold only rwlock. All writers must be in a 95 * sleepable context. 96 */ 97 struct mutex lock; 98 /* rwlock protects data_vec[ix]->props. */ 99 rwlock_t rwlock; 100 struct ib_gid_table_entry *data_vec; 101 }; 102 103 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 104 { 105 struct ib_event event; 106 107 event.device = ib_dev; 108 event.element.port_num = port; 109 event.event = IB_EVENT_GID_CHANGE; 110 111 ib_dispatch_event(&event); 112 } 113 114 static const char * const gid_type_str[] = { 115 [IB_GID_TYPE_IB] = "IB/RoCE v1", 116 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", 117 }; 118 119 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 120 { 121 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 122 return gid_type_str[gid_type]; 123 124 return "Invalid GID type"; 125 } 126 EXPORT_SYMBOL(ib_cache_gid_type_str); 127 128 /** rdma_is_zero_gid - Check if given GID is zero or not. 129 * @gid: GID to check 130 * Returns true if given GID is zero, returns false otherwise. 131 */ 132 bool rdma_is_zero_gid(const union ib_gid *gid) 133 { 134 return !memcmp(gid, &zgid, sizeof(*gid)); 135 } 136 EXPORT_SYMBOL(rdma_is_zero_gid); 137 138 int ib_cache_gid_parse_type_str(const char *buf) 139 { 140 unsigned int i; 141 size_t len; 142 int err = -EINVAL; 143 144 len = strlen(buf); 145 if (len == 0) 146 return -EINVAL; 147 148 if (buf[len - 1] == '\n') 149 len--; 150 151 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) 152 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && 153 len == strlen(gid_type_str[i])) { 154 err = i; 155 break; 156 } 157 158 return err; 159 } 160 EXPORT_SYMBOL(ib_cache_gid_parse_type_str); 161 162 static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) 163 { 164 return device->cache.ports[port - rdma_start_port(device)].gid; 165 } 166 167 static void del_roce_gid(struct ib_device *device, u8 port_num, 168 struct ib_gid_table *table, int ix) 169 { 170 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 171 device->name, port_num, ix, 172 table->data_vec[ix].gid.raw); 173 174 if (rdma_cap_roce_gid_table(device, port_num)) 175 device->del_gid(&table->data_vec[ix].attr, 176 &table->data_vec[ix].context); 177 dev_put(table->data_vec[ix].attr.ndev); 178 } 179 180 static int add_roce_gid(struct ib_gid_table *table, 181 const union ib_gid *gid, 182 const struct ib_gid_attr *attr) 183 { 184 struct ib_gid_table_entry *entry; 185 int ix = attr->index; 186 int ret = 0; 187 188 if (!attr->ndev) { 189 pr_err("%s NULL netdev device=%s port=%d index=%d\n", 190 __func__, attr->device->name, attr->port_num, 191 attr->index); 192 return -EINVAL; 193 } 194 195 entry = &table->data_vec[ix]; 196 if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { 197 WARN(1, "GID table corruption device=%s port=%d index=%d\n", 198 attr->device->name, attr->port_num, 199 attr->index); 200 return -EINVAL; 201 } 202 203 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { 204 ret = attr->device->add_gid(gid, attr, &entry->context); 205 if (ret) { 206 pr_err("%s GID add failed device=%s port=%d index=%d\n", 207 __func__, attr->device->name, attr->port_num, 208 attr->index); 209 goto add_err; 210 } 211 } 212 dev_hold(attr->ndev); 213 214 add_err: 215 if (!ret) 216 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 217 attr->device->name, attr->port_num, ix, gid->raw); 218 return ret; 219 } 220 221 /** 222 * add_modify_gid - Add or modify GID table entry 223 * 224 * @table: GID table in which GID to be added or modified 225 * @gid: GID content 226 * @attr: Attributes of the GID 227 * 228 * Returns 0 on success or appropriate error code. It accepts zero 229 * GID addition for non RoCE ports for HCA's who report them as valid 230 * GID. However such zero GIDs are not added to the cache. 231 */ 232 static int add_modify_gid(struct ib_gid_table *table, 233 const union ib_gid *gid, 234 const struct ib_gid_attr *attr) 235 { 236 int ret; 237 238 if (rdma_protocol_roce(attr->device, attr->port_num)) { 239 ret = add_roce_gid(table, gid, attr); 240 if (ret) 241 return ret; 242 } else { 243 /* 244 * Some HCA's report multiple GID entries with only one 245 * valid GID, but remaining as zero GID. 246 * So ignore such behavior for IB link layer and don't 247 * fail the call, but don't add such entry to GID cache. 248 */ 249 if (rdma_is_zero_gid(gid)) 250 return 0; 251 } 252 253 lockdep_assert_held(&table->lock); 254 memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); 255 memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); 256 257 write_lock_irq(&table->rwlock); 258 table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; 259 write_unlock_irq(&table->rwlock); 260 return 0; 261 } 262 263 /** 264 * del_gid - Delete GID table entry 265 * 266 * @ib_dev: IB device whose GID entry to be deleted 267 * @port: Port number of the IB device 268 * @table: GID table of the IB device for a port 269 * @ix: GID entry index to delete 270 * 271 */ 272 static void del_gid(struct ib_device *ib_dev, u8 port, 273 struct ib_gid_table *table, int ix) 274 { 275 lockdep_assert_held(&table->lock); 276 write_lock_irq(&table->rwlock); 277 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 278 write_unlock_irq(&table->rwlock); 279 280 if (rdma_protocol_roce(ib_dev, port)) 281 del_roce_gid(ib_dev, port, table, ix); 282 memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); 283 memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); 284 table->data_vec[ix].context = NULL; 285 } 286 287 /* rwlock should be read locked, or lock should be held */ 288 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 289 const struct ib_gid_attr *val, bool default_gid, 290 unsigned long mask, int *pempty) 291 { 292 int i = 0; 293 int found = -1; 294 int empty = pempty ? -1 : 0; 295 296 while (i < table->sz && (found < 0 || empty < 0)) { 297 struct ib_gid_table_entry *data = &table->data_vec[i]; 298 struct ib_gid_attr *attr = &data->attr; 299 int curr_index = i; 300 301 i++; 302 303 /* find_gid() is used during GID addition where it is expected 304 * to return a free entry slot which is not duplicate. 305 * Free entry slot is requested and returned if pempty is set, 306 * so lookup free slot only if requested. 307 */ 308 if (pempty && empty < 0) { 309 if (data->props & GID_TABLE_ENTRY_INVALID && 310 (default_gid == 311 !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { 312 /* 313 * Found an invalid (free) entry; allocate it. 314 * If default GID is requested, then our 315 * found slot must be one of the DEFAULT 316 * reserved slots or we fail. 317 * This ensures that only DEFAULT reserved 318 * slots are used for default property GIDs. 319 */ 320 empty = curr_index; 321 } 322 } 323 324 /* 325 * Additionally find_gid() is used to find valid entry during 326 * lookup operation, where validity needs to be checked. So 327 * find the empty entry first to continue to search for a free 328 * slot and ignore its INVALID flag. 329 */ 330 if (data->props & GID_TABLE_ENTRY_INVALID) 331 continue; 332 333 if (found >= 0) 334 continue; 335 336 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 337 attr->gid_type != val->gid_type) 338 continue; 339 340 if (mask & GID_ATTR_FIND_MASK_GID && 341 memcmp(gid, &data->gid, sizeof(*gid))) 342 continue; 343 344 if (mask & GID_ATTR_FIND_MASK_NETDEV && 345 attr->ndev != val->ndev) 346 continue; 347 348 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 349 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 350 default_gid) 351 continue; 352 353 found = curr_index; 354 } 355 356 if (pempty) 357 *pempty = empty; 358 359 return found; 360 } 361 362 static void make_default_gid(struct net_device *dev, union ib_gid *gid) 363 { 364 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 365 addrconf_ifid_eui48(&gid->raw[8], dev); 366 } 367 368 static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 369 union ib_gid *gid, struct ib_gid_attr *attr, 370 unsigned long mask, bool default_gid) 371 { 372 struct ib_gid_table *table; 373 int ret = 0; 374 int empty; 375 int ix; 376 377 /* Do not allow adding zero GID in support of 378 * IB spec version 1.3 section 4.1.1 point (6) and 379 * section 12.7.10 and section 12.7.20 380 */ 381 if (rdma_is_zero_gid(gid)) 382 return -EINVAL; 383 384 table = rdma_gid_table(ib_dev, port); 385 386 mutex_lock(&table->lock); 387 388 ix = find_gid(table, gid, attr, default_gid, mask, &empty); 389 if (ix >= 0) 390 goto out_unlock; 391 392 if (empty < 0) { 393 ret = -ENOSPC; 394 goto out_unlock; 395 } 396 attr->device = ib_dev; 397 attr->index = empty; 398 attr->port_num = port; 399 ret = add_modify_gid(table, gid, attr); 400 if (!ret) 401 dispatch_gid_change_event(ib_dev, port); 402 403 out_unlock: 404 mutex_unlock(&table->lock); 405 if (ret) 406 pr_warn("%s: unable to add gid %pI6 error=%d\n", 407 __func__, gid->raw, ret); 408 return ret; 409 } 410 411 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 412 union ib_gid *gid, struct ib_gid_attr *attr) 413 { 414 struct net_device *idev; 415 unsigned long mask; 416 int ret; 417 418 if (ib_dev->get_netdev) { 419 idev = ib_dev->get_netdev(ib_dev, port); 420 if (idev && attr->ndev != idev) { 421 union ib_gid default_gid; 422 423 /* Adding default GIDs in not permitted */ 424 make_default_gid(idev, &default_gid); 425 if (!memcmp(gid, &default_gid, sizeof(*gid))) { 426 dev_put(idev); 427 return -EPERM; 428 } 429 } 430 if (idev) 431 dev_put(idev); 432 } 433 434 mask = GID_ATTR_FIND_MASK_GID | 435 GID_ATTR_FIND_MASK_GID_TYPE | 436 GID_ATTR_FIND_MASK_NETDEV; 437 438 ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); 439 return ret; 440 } 441 442 static int 443 _ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 444 union ib_gid *gid, struct ib_gid_attr *attr, 445 unsigned long mask, bool default_gid) 446 { 447 struct ib_gid_table *table; 448 int ret = 0; 449 int ix; 450 451 table = rdma_gid_table(ib_dev, port); 452 453 mutex_lock(&table->lock); 454 455 ix = find_gid(table, gid, attr, default_gid, mask, NULL); 456 if (ix < 0) { 457 ret = -EINVAL; 458 goto out_unlock; 459 } 460 461 del_gid(ib_dev, port, table, ix); 462 dispatch_gid_change_event(ib_dev, port); 463 464 out_unlock: 465 mutex_unlock(&table->lock); 466 if (ret) 467 pr_debug("%s: can't delete gid %pI6 error=%d\n", 468 __func__, gid->raw, ret); 469 return ret; 470 } 471 472 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 473 union ib_gid *gid, struct ib_gid_attr *attr) 474 { 475 unsigned long mask = GID_ATTR_FIND_MASK_GID | 476 GID_ATTR_FIND_MASK_GID_TYPE | 477 GID_ATTR_FIND_MASK_DEFAULT | 478 GID_ATTR_FIND_MASK_NETDEV; 479 480 return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false); 481 } 482 483 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, 484 struct net_device *ndev) 485 { 486 struct ib_gid_table *table; 487 int ix; 488 bool deleted = false; 489 490 table = rdma_gid_table(ib_dev, port); 491 492 mutex_lock(&table->lock); 493 494 for (ix = 0; ix < table->sz; ix++) { 495 if (table->data_vec[ix].attr.ndev == ndev) { 496 del_gid(ib_dev, port, table, ix); 497 deleted = true; 498 } 499 } 500 501 mutex_unlock(&table->lock); 502 503 if (deleted) 504 dispatch_gid_change_event(ib_dev, port); 505 506 return 0; 507 } 508 509 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, 510 union ib_gid *gid, struct ib_gid_attr *attr) 511 { 512 struct ib_gid_table *table; 513 514 table = rdma_gid_table(ib_dev, port); 515 516 if (index < 0 || index >= table->sz) 517 return -EINVAL; 518 519 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) 520 return -EINVAL; 521 522 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 523 if (attr) { 524 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); 525 if (attr->ndev) 526 dev_hold(attr->ndev); 527 } 528 529 return 0; 530 } 531 532 static int _ib_cache_gid_table_find(struct ib_device *ib_dev, 533 const union ib_gid *gid, 534 const struct ib_gid_attr *val, 535 unsigned long mask, 536 u8 *port, u16 *index) 537 { 538 struct ib_gid_table *table; 539 u8 p; 540 int local_index; 541 unsigned long flags; 542 543 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 544 table = ib_dev->cache.ports[p].gid; 545 read_lock_irqsave(&table->rwlock, flags); 546 local_index = find_gid(table, gid, val, false, mask, NULL); 547 if (local_index >= 0) { 548 if (index) 549 *index = local_index; 550 if (port) 551 *port = p + rdma_start_port(ib_dev); 552 read_unlock_irqrestore(&table->rwlock, flags); 553 return 0; 554 } 555 read_unlock_irqrestore(&table->rwlock, flags); 556 } 557 558 return -ENOENT; 559 } 560 561 static int ib_cache_gid_find(struct ib_device *ib_dev, 562 const union ib_gid *gid, 563 enum ib_gid_type gid_type, 564 struct net_device *ndev, u8 *port, 565 u16 *index) 566 { 567 unsigned long mask = GID_ATTR_FIND_MASK_GID | 568 GID_ATTR_FIND_MASK_GID_TYPE; 569 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 570 571 if (ndev) 572 mask |= GID_ATTR_FIND_MASK_NETDEV; 573 574 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, 575 mask, port, index); 576 } 577 578 /** 579 * ib_find_cached_gid_by_port - Returns the GID table index where a specified 580 * GID value occurs. It searches for the specified GID value in the local 581 * software cache. 582 * @device: The device to query. 583 * @gid: The GID value to search for. 584 * @gid_type: The GID type to search for. 585 * @port_num: The port number of the device where the GID value should be 586 * searched. 587 * @ndev: In RoCE, the net device of the device. Null means ignore. 588 * @index: The index into the cached GID table where the GID was found. This 589 * parameter may be NULL. 590 */ 591 int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 592 const union ib_gid *gid, 593 enum ib_gid_type gid_type, 594 u8 port, struct net_device *ndev, 595 u16 *index) 596 { 597 int local_index; 598 struct ib_gid_table *table; 599 unsigned long mask = GID_ATTR_FIND_MASK_GID | 600 GID_ATTR_FIND_MASK_GID_TYPE; 601 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 602 unsigned long flags; 603 604 if (!rdma_is_port_valid(ib_dev, port)) 605 return -ENOENT; 606 607 table = rdma_gid_table(ib_dev, port); 608 609 if (ndev) 610 mask |= GID_ATTR_FIND_MASK_NETDEV; 611 612 read_lock_irqsave(&table->rwlock, flags); 613 local_index = find_gid(table, gid, &val, false, mask, NULL); 614 if (local_index >= 0) { 615 if (index) 616 *index = local_index; 617 read_unlock_irqrestore(&table->rwlock, flags); 618 return 0; 619 } 620 621 read_unlock_irqrestore(&table->rwlock, flags); 622 return -ENOENT; 623 } 624 EXPORT_SYMBOL(ib_find_cached_gid_by_port); 625 626 /** 627 * ib_cache_gid_find_by_filter - Returns the GID table index where a specified 628 * GID value occurs 629 * @device: The device to query. 630 * @gid: The GID value to search for. 631 * @port_num: The port number of the device where the GID value could be 632 * searched. 633 * @filter: The filter function is executed on any matching GID in the table. 634 * If the filter function returns true, the corresponding index is returned, 635 * otherwise, we continue searching the GID table. It's guaranteed that 636 * while filter is executed, ndev field is valid and the structure won't 637 * change. filter is executed in an atomic context. filter must not be NULL. 638 * @index: The index into the cached GID table where the GID was found. This 639 * parameter may be NULL. 640 * 641 * ib_cache_gid_find_by_filter() searches for the specified GID value 642 * of which the filter function returns true in the port's GID table. 643 * This function is only supported on RoCE ports. 644 * 645 */ 646 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 647 const union ib_gid *gid, 648 u8 port, 649 bool (*filter)(const union ib_gid *, 650 const struct ib_gid_attr *, 651 void *), 652 void *context, 653 u16 *index) 654 { 655 struct ib_gid_table *table; 656 unsigned int i; 657 unsigned long flags; 658 bool found = false; 659 660 661 if (!rdma_is_port_valid(ib_dev, port) || 662 !rdma_protocol_roce(ib_dev, port)) 663 return -EPROTONOSUPPORT; 664 665 table = rdma_gid_table(ib_dev, port); 666 667 read_lock_irqsave(&table->rwlock, flags); 668 for (i = 0; i < table->sz; i++) { 669 struct ib_gid_attr attr; 670 671 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 672 continue; 673 674 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 675 continue; 676 677 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 678 679 if (filter(gid, &attr, context)) { 680 found = true; 681 if (index) 682 *index = i; 683 break; 684 } 685 } 686 read_unlock_irqrestore(&table->rwlock, flags); 687 688 if (!found) 689 return -ENOENT; 690 return 0; 691 } 692 693 static struct ib_gid_table *alloc_gid_table(int sz) 694 { 695 struct ib_gid_table *table = 696 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 697 int i; 698 699 if (!table) 700 return NULL; 701 702 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); 703 if (!table->data_vec) 704 goto err_free_table; 705 706 mutex_init(&table->lock); 707 708 table->sz = sz; 709 rwlock_init(&table->rwlock); 710 711 /* Mark all entries as invalid so that allocator can allocate 712 * one of the invalid (free) entry. 713 */ 714 for (i = 0; i < sz; i++) 715 table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; 716 return table; 717 718 err_free_table: 719 kfree(table); 720 return NULL; 721 } 722 723 static void release_gid_table(struct ib_gid_table *table) 724 { 725 if (table) { 726 kfree(table->data_vec); 727 kfree(table); 728 } 729 } 730 731 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 732 struct ib_gid_table *table) 733 { 734 int i; 735 bool deleted = false; 736 737 if (!table) 738 return; 739 740 mutex_lock(&table->lock); 741 for (i = 0; i < table->sz; ++i) { 742 if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { 743 del_gid(ib_dev, port, table, i); 744 deleted = true; 745 } 746 } 747 mutex_unlock(&table->lock); 748 749 if (deleted) 750 dispatch_gid_change_event(ib_dev, port); 751 } 752 753 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 754 struct net_device *ndev, 755 unsigned long gid_type_mask, 756 enum ib_cache_gid_default_mode mode) 757 { 758 union ib_gid gid = { }; 759 struct ib_gid_attr gid_attr; 760 struct ib_gid_table *table; 761 unsigned int gid_type; 762 unsigned long mask; 763 764 table = rdma_gid_table(ib_dev, port); 765 766 mask = GID_ATTR_FIND_MASK_GID_TYPE | 767 GID_ATTR_FIND_MASK_DEFAULT | 768 GID_ATTR_FIND_MASK_NETDEV; 769 memset(&gid_attr, 0, sizeof(gid_attr)); 770 gid_attr.ndev = ndev; 771 772 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 773 if (1UL << gid_type & ~gid_type_mask) 774 continue; 775 776 gid_attr.gid_type = gid_type; 777 778 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 779 make_default_gid(ndev, &gid); 780 __ib_cache_gid_add(ib_dev, port, &gid, 781 &gid_attr, mask, true); 782 } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { 783 _ib_cache_gid_del(ib_dev, port, &gid, 784 &gid_attr, mask, true); 785 } 786 } 787 } 788 789 static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 790 struct ib_gid_table *table) 791 { 792 unsigned int i; 793 unsigned long roce_gid_type_mask; 794 unsigned int num_default_gids; 795 unsigned int current_gid = 0; 796 797 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 798 num_default_gids = hweight_long(roce_gid_type_mask); 799 for (i = 0; i < num_default_gids && i < table->sz; i++) { 800 struct ib_gid_table_entry *entry = &table->data_vec[i]; 801 802 entry->props |= GID_TABLE_ENTRY_DEFAULT; 803 current_gid = find_next_bit(&roce_gid_type_mask, 804 BITS_PER_LONG, 805 current_gid); 806 entry->attr.gid_type = current_gid++; 807 } 808 } 809 810 811 static void gid_table_release_one(struct ib_device *ib_dev) 812 { 813 struct ib_gid_table *table; 814 u8 port; 815 816 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 817 table = ib_dev->cache.ports[port].gid; 818 release_gid_table(table); 819 ib_dev->cache.ports[port].gid = NULL; 820 } 821 } 822 823 static int _gid_table_setup_one(struct ib_device *ib_dev) 824 { 825 u8 port; 826 struct ib_gid_table *table; 827 828 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 829 u8 rdma_port = port + rdma_start_port(ib_dev); 830 831 table = alloc_gid_table( 832 ib_dev->port_immutable[rdma_port].gid_tbl_len); 833 if (!table) 834 goto rollback_table_setup; 835 836 gid_table_reserve_default(ib_dev, rdma_port, table); 837 ib_dev->cache.ports[port].gid = table; 838 } 839 return 0; 840 841 rollback_table_setup: 842 gid_table_release_one(ib_dev); 843 return -ENOMEM; 844 } 845 846 static void gid_table_cleanup_one(struct ib_device *ib_dev) 847 { 848 struct ib_gid_table *table; 849 u8 port; 850 851 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 852 table = ib_dev->cache.ports[port].gid; 853 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 854 table); 855 } 856 } 857 858 static int gid_table_setup_one(struct ib_device *ib_dev) 859 { 860 int err; 861 862 err = _gid_table_setup_one(ib_dev); 863 864 if (err) 865 return err; 866 867 rdma_roce_rescan_device(ib_dev); 868 869 return err; 870 } 871 872 int ib_get_cached_gid(struct ib_device *device, 873 u8 port_num, 874 int index, 875 union ib_gid *gid, 876 struct ib_gid_attr *gid_attr) 877 { 878 int res; 879 unsigned long flags; 880 struct ib_gid_table *table; 881 882 if (!rdma_is_port_valid(device, port_num)) 883 return -EINVAL; 884 885 table = rdma_gid_table(device, port_num); 886 read_lock_irqsave(&table->rwlock, flags); 887 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 888 read_unlock_irqrestore(&table->rwlock, flags); 889 890 return res; 891 } 892 EXPORT_SYMBOL(ib_get_cached_gid); 893 894 /** 895 * ib_find_cached_gid - Returns the port number and GID table index where 896 * a specified GID value occurs. 897 * @device: The device to query. 898 * @gid: The GID value to search for. 899 * @gid_type: The GID type to search for. 900 * @ndev: In RoCE, the net device of the device. NULL means ignore. 901 * @port_num: The port number of the device where the GID value was found. 902 * @index: The index into the cached GID table where the GID was found. This 903 * parameter may be NULL. 904 * 905 * ib_find_cached_gid() searches for the specified GID value in 906 * the local software cache. 907 */ 908 int ib_find_cached_gid(struct ib_device *device, 909 const union ib_gid *gid, 910 enum ib_gid_type gid_type, 911 struct net_device *ndev, 912 u8 *port_num, 913 u16 *index) 914 { 915 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 916 } 917 EXPORT_SYMBOL(ib_find_cached_gid); 918 919 int ib_find_gid_by_filter(struct ib_device *device, 920 const union ib_gid *gid, 921 u8 port_num, 922 bool (*filter)(const union ib_gid *gid, 923 const struct ib_gid_attr *, 924 void *), 925 void *context, u16 *index) 926 { 927 /* Only RoCE GID table supports filter function */ 928 if (!rdma_protocol_roce(device, port_num) && filter) 929 return -EPROTONOSUPPORT; 930 931 return ib_cache_gid_find_by_filter(device, gid, 932 port_num, filter, 933 context, index); 934 } 935 936 int ib_get_cached_pkey(struct ib_device *device, 937 u8 port_num, 938 int index, 939 u16 *pkey) 940 { 941 struct ib_pkey_cache *cache; 942 unsigned long flags; 943 int ret = 0; 944 945 if (!rdma_is_port_valid(device, port_num)) 946 return -EINVAL; 947 948 read_lock_irqsave(&device->cache.lock, flags); 949 950 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 951 952 if (index < 0 || index >= cache->table_len) 953 ret = -EINVAL; 954 else 955 *pkey = cache->table[index]; 956 957 read_unlock_irqrestore(&device->cache.lock, flags); 958 959 return ret; 960 } 961 EXPORT_SYMBOL(ib_get_cached_pkey); 962 963 int ib_get_cached_subnet_prefix(struct ib_device *device, 964 u8 port_num, 965 u64 *sn_pfx) 966 { 967 unsigned long flags; 968 int p; 969 970 if (!rdma_is_port_valid(device, port_num)) 971 return -EINVAL; 972 973 p = port_num - rdma_start_port(device); 974 read_lock_irqsave(&device->cache.lock, flags); 975 *sn_pfx = device->cache.ports[p].subnet_prefix; 976 read_unlock_irqrestore(&device->cache.lock, flags); 977 978 return 0; 979 } 980 EXPORT_SYMBOL(ib_get_cached_subnet_prefix); 981 982 int ib_find_cached_pkey(struct ib_device *device, 983 u8 port_num, 984 u16 pkey, 985 u16 *index) 986 { 987 struct ib_pkey_cache *cache; 988 unsigned long flags; 989 int i; 990 int ret = -ENOENT; 991 int partial_ix = -1; 992 993 if (!rdma_is_port_valid(device, port_num)) 994 return -EINVAL; 995 996 read_lock_irqsave(&device->cache.lock, flags); 997 998 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 999 1000 *index = -1; 1001 1002 for (i = 0; i < cache->table_len; ++i) 1003 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 1004 if (cache->table[i] & 0x8000) { 1005 *index = i; 1006 ret = 0; 1007 break; 1008 } else 1009 partial_ix = i; 1010 } 1011 1012 if (ret && partial_ix >= 0) { 1013 *index = partial_ix; 1014 ret = 0; 1015 } 1016 1017 read_unlock_irqrestore(&device->cache.lock, flags); 1018 1019 return ret; 1020 } 1021 EXPORT_SYMBOL(ib_find_cached_pkey); 1022 1023 int ib_find_exact_cached_pkey(struct ib_device *device, 1024 u8 port_num, 1025 u16 pkey, 1026 u16 *index) 1027 { 1028 struct ib_pkey_cache *cache; 1029 unsigned long flags; 1030 int i; 1031 int ret = -ENOENT; 1032 1033 if (!rdma_is_port_valid(device, port_num)) 1034 return -EINVAL; 1035 1036 read_lock_irqsave(&device->cache.lock, flags); 1037 1038 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 1039 1040 *index = -1; 1041 1042 for (i = 0; i < cache->table_len; ++i) 1043 if (cache->table[i] == pkey) { 1044 *index = i; 1045 ret = 0; 1046 break; 1047 } 1048 1049 read_unlock_irqrestore(&device->cache.lock, flags); 1050 1051 return ret; 1052 } 1053 EXPORT_SYMBOL(ib_find_exact_cached_pkey); 1054 1055 int ib_get_cached_lmc(struct ib_device *device, 1056 u8 port_num, 1057 u8 *lmc) 1058 { 1059 unsigned long flags; 1060 int ret = 0; 1061 1062 if (!rdma_is_port_valid(device, port_num)) 1063 return -EINVAL; 1064 1065 read_lock_irqsave(&device->cache.lock, flags); 1066 *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc; 1067 read_unlock_irqrestore(&device->cache.lock, flags); 1068 1069 return ret; 1070 } 1071 EXPORT_SYMBOL(ib_get_cached_lmc); 1072 1073 int ib_get_cached_port_state(struct ib_device *device, 1074 u8 port_num, 1075 enum ib_port_state *port_state) 1076 { 1077 unsigned long flags; 1078 int ret = 0; 1079 1080 if (!rdma_is_port_valid(device, port_num)) 1081 return -EINVAL; 1082 1083 read_lock_irqsave(&device->cache.lock, flags); 1084 *port_state = device->cache.ports[port_num 1085 - rdma_start_port(device)].port_state; 1086 read_unlock_irqrestore(&device->cache.lock, flags); 1087 1088 return ret; 1089 } 1090 EXPORT_SYMBOL(ib_get_cached_port_state); 1091 1092 static int config_non_roce_gid_cache(struct ib_device *device, 1093 u8 port, int gid_tbl_len) 1094 { 1095 struct ib_gid_attr gid_attr = {}; 1096 struct ib_gid_table *table; 1097 union ib_gid gid; 1098 int ret = 0; 1099 int i; 1100 1101 gid_attr.device = device; 1102 gid_attr.port_num = port; 1103 table = rdma_gid_table(device, port); 1104 1105 mutex_lock(&table->lock); 1106 for (i = 0; i < gid_tbl_len; ++i) { 1107 if (!device->query_gid) 1108 continue; 1109 ret = device->query_gid(device, port, i, &gid); 1110 if (ret) { 1111 pr_warn("query_gid failed (%d) for %s (index %d)\n", 1112 ret, device->name, i); 1113 goto err; 1114 } 1115 gid_attr.index = i; 1116 add_modify_gid(table, &gid, &gid_attr); 1117 } 1118 err: 1119 mutex_unlock(&table->lock); 1120 return ret; 1121 } 1122 1123 static void ib_cache_update(struct ib_device *device, 1124 u8 port, 1125 bool enforce_security) 1126 { 1127 struct ib_port_attr *tprops = NULL; 1128 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1129 int i; 1130 int ret; 1131 struct ib_gid_table *table; 1132 1133 if (!rdma_is_port_valid(device, port)) 1134 return; 1135 1136 table = rdma_gid_table(device, port); 1137 1138 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1139 if (!tprops) 1140 return; 1141 1142 ret = ib_query_port(device, port, tprops); 1143 if (ret) { 1144 pr_warn("ib_query_port failed (%d) for %s\n", 1145 ret, device->name); 1146 goto err; 1147 } 1148 1149 if (!rdma_protocol_roce(device, port)) { 1150 ret = config_non_roce_gid_cache(device, port, 1151 tprops->gid_tbl_len); 1152 if (ret) 1153 goto err; 1154 } 1155 1156 pkey_cache = kmalloc(struct_size(pkey_cache, table, 1157 tprops->pkey_tbl_len), 1158 GFP_KERNEL); 1159 if (!pkey_cache) 1160 goto err; 1161 1162 pkey_cache->table_len = tprops->pkey_tbl_len; 1163 1164 for (i = 0; i < pkey_cache->table_len; ++i) { 1165 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1166 if (ret) { 1167 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1168 ret, device->name, i); 1169 goto err; 1170 } 1171 } 1172 1173 write_lock_irq(&device->cache.lock); 1174 1175 old_pkey_cache = device->cache.ports[port - 1176 rdma_start_port(device)].pkey; 1177 1178 device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache; 1179 device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc; 1180 device->cache.ports[port - rdma_start_port(device)].port_state = 1181 tprops->state; 1182 1183 device->cache.ports[port - rdma_start_port(device)].subnet_prefix = 1184 tprops->subnet_prefix; 1185 write_unlock_irq(&device->cache.lock); 1186 1187 if (enforce_security) 1188 ib_security_cache_change(device, 1189 port, 1190 tprops->subnet_prefix); 1191 1192 kfree(old_pkey_cache); 1193 kfree(tprops); 1194 return; 1195 1196 err: 1197 kfree(pkey_cache); 1198 kfree(tprops); 1199 } 1200 1201 static void ib_cache_task(struct work_struct *_work) 1202 { 1203 struct ib_update_work *work = 1204 container_of(_work, struct ib_update_work, work); 1205 1206 ib_cache_update(work->device, 1207 work->port_num, 1208 work->enforce_security); 1209 kfree(work); 1210 } 1211 1212 static void ib_cache_event(struct ib_event_handler *handler, 1213 struct ib_event *event) 1214 { 1215 struct ib_update_work *work; 1216 1217 if (event->event == IB_EVENT_PORT_ERR || 1218 event->event == IB_EVENT_PORT_ACTIVE || 1219 event->event == IB_EVENT_LID_CHANGE || 1220 event->event == IB_EVENT_PKEY_CHANGE || 1221 event->event == IB_EVENT_SM_CHANGE || 1222 event->event == IB_EVENT_CLIENT_REREGISTER || 1223 event->event == IB_EVENT_GID_CHANGE) { 1224 work = kmalloc(sizeof *work, GFP_ATOMIC); 1225 if (work) { 1226 INIT_WORK(&work->work, ib_cache_task); 1227 work->device = event->device; 1228 work->port_num = event->element.port_num; 1229 if (event->event == IB_EVENT_PKEY_CHANGE || 1230 event->event == IB_EVENT_GID_CHANGE) 1231 work->enforce_security = true; 1232 else 1233 work->enforce_security = false; 1234 1235 queue_work(ib_wq, &work->work); 1236 } 1237 } 1238 } 1239 1240 int ib_cache_setup_one(struct ib_device *device) 1241 { 1242 int p; 1243 int err; 1244 1245 rwlock_init(&device->cache.lock); 1246 1247 device->cache.ports = 1248 kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1, 1249 sizeof(*device->cache.ports), 1250 GFP_KERNEL); 1251 if (!device->cache.ports) 1252 return -ENOMEM; 1253 1254 err = gid_table_setup_one(device); 1255 if (err) { 1256 kfree(device->cache.ports); 1257 device->cache.ports = NULL; 1258 return err; 1259 } 1260 1261 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1262 ib_cache_update(device, p + rdma_start_port(device), true); 1263 1264 INIT_IB_EVENT_HANDLER(&device->cache.event_handler, 1265 device, ib_cache_event); 1266 ib_register_event_handler(&device->cache.event_handler); 1267 return 0; 1268 } 1269 1270 void ib_cache_release_one(struct ib_device *device) 1271 { 1272 int p; 1273 1274 /* 1275 * The release function frees all the cache elements. 1276 * This function should be called as part of freeing 1277 * all the device's resources when the cache could no 1278 * longer be accessed. 1279 */ 1280 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1281 kfree(device->cache.ports[p].pkey); 1282 1283 gid_table_release_one(device); 1284 kfree(device->cache.ports); 1285 } 1286 1287 void ib_cache_cleanup_one(struct ib_device *device) 1288 { 1289 /* The cleanup function unregisters the event handler, 1290 * waits for all in-progress workqueue elements and cleans 1291 * up the GID cache. This function should be called after 1292 * the device was removed from the devices list and all 1293 * clients were removed, so the cache exists but is 1294 * non-functional and shouldn't be updated anymore. 1295 */ 1296 ib_unregister_event_handler(&device->cache.event_handler); 1297 flush_workqueue(ib_wq); 1298 gid_table_cleanup_one(device); 1299 } 1300