1 /* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/module.h> 37 #include <linux/errno.h> 38 #include <linux/slab.h> 39 #include <linux/workqueue.h> 40 #include <linux/netdevice.h> 41 #include <net/addrconf.h> 42 43 #include <rdma/ib_cache.h> 44 45 #include "core_priv.h" 46 47 struct ib_pkey_cache { 48 int table_len; 49 u16 table[0]; 50 }; 51 52 struct ib_update_work { 53 struct work_struct work; 54 struct ib_device *device; 55 u8 port_num; 56 bool enforce_security; 57 }; 58 59 union ib_gid zgid; 60 EXPORT_SYMBOL(zgid); 61 62 static const struct ib_gid_attr zattr; 63 64 enum gid_attr_find_mask { 65 GID_ATTR_FIND_MASK_GID = 1UL << 0, 66 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 67 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 68 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 69 }; 70 71 enum gid_table_entry_props { 72 GID_TABLE_ENTRY_INVALID = 1UL << 0, 73 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 74 }; 75 76 enum gid_table_write_action { 77 GID_TABLE_WRITE_ACTION_ADD, 78 GID_TABLE_WRITE_ACTION_DEL, 79 /* MODIFY only updates the GID table. Currently only used by 80 * ib_cache_update. 81 */ 82 GID_TABLE_WRITE_ACTION_MODIFY 83 }; 84 85 struct ib_gid_table_entry { 86 unsigned long props; 87 union ib_gid gid; 88 struct ib_gid_attr attr; 89 void *context; 90 }; 91 92 struct ib_gid_table { 93 int sz; 94 /* In RoCE, adding a GID to the table requires: 95 * (a) Find if this GID is already exists. 96 * (b) Find a free space. 97 * (c) Write the new GID 98 * 99 * Delete requires different set of operations: 100 * (a) Find the GID 101 * (b) Delete it. 102 * 103 * Add/delete should be carried out atomically. 104 * This is done by locking this mutex from multiple 105 * writers. We don't need this lock for IB, as the MAD 106 * layer replaces all entries. All data_vec entries 107 * are locked by this lock. 108 **/ 109 struct mutex lock; 110 /* This lock protects the table entries from being 111 * read and written simultaneously. 112 */ 113 rwlock_t rwlock; 114 struct ib_gid_table_entry *data_vec; 115 }; 116 117 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 118 { 119 if (rdma_cap_roce_gid_table(ib_dev, port)) { 120 struct ib_event event; 121 122 event.device = ib_dev; 123 event.element.port_num = port; 124 event.event = IB_EVENT_GID_CHANGE; 125 126 ib_dispatch_event(&event); 127 } 128 } 129 130 static const char * const gid_type_str[] = { 131 [IB_GID_TYPE_IB] = "IB/RoCE v1", 132 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", 133 }; 134 135 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 136 { 137 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 138 return gid_type_str[gid_type]; 139 140 return "Invalid GID type"; 141 } 142 EXPORT_SYMBOL(ib_cache_gid_type_str); 143 144 int ib_cache_gid_parse_type_str(const char *buf) 145 { 146 unsigned int i; 147 size_t len; 148 int err = -EINVAL; 149 150 len = strlen(buf); 151 if (len == 0) 152 return -EINVAL; 153 154 if (buf[len - 1] == '\n') 155 len--; 156 157 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) 158 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && 159 len == strlen(gid_type_str[i])) { 160 err = i; 161 break; 162 } 163 164 return err; 165 } 166 EXPORT_SYMBOL(ib_cache_gid_parse_type_str); 167 168 /* This function expects that rwlock will be write locked in all 169 * scenarios and that lock will be locked in sleep-able (RoCE) 170 * scenarios. 171 */ 172 static int write_gid(struct ib_device *ib_dev, u8 port, 173 struct ib_gid_table *table, int ix, 174 const union ib_gid *gid, 175 const struct ib_gid_attr *attr, 176 enum gid_table_write_action action, 177 bool default_gid) 178 __releases(&table->rwlock) __acquires(&table->rwlock) 179 { 180 int ret = 0; 181 struct net_device *old_net_dev; 182 enum ib_gid_type old_gid_type; 183 184 /* in rdma_cap_roce_gid_table, this funciton should be protected by a 185 * sleep-able lock. 186 */ 187 188 if (rdma_cap_roce_gid_table(ib_dev, port)) { 189 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 190 write_unlock_irq(&table->rwlock); 191 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by 192 * RoCE providers and thus only updates the cache. 193 */ 194 if (action == GID_TABLE_WRITE_ACTION_ADD) 195 ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, 196 &table->data_vec[ix].context); 197 else if (action == GID_TABLE_WRITE_ACTION_DEL) 198 ret = ib_dev->del_gid(ib_dev, port, ix, 199 &table->data_vec[ix].context); 200 write_lock_irq(&table->rwlock); 201 } 202 203 old_net_dev = table->data_vec[ix].attr.ndev; 204 old_gid_type = table->data_vec[ix].attr.gid_type; 205 if (old_net_dev && old_net_dev != attr->ndev) 206 dev_put(old_net_dev); 207 /* if modify_gid failed, just delete the old gid */ 208 if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { 209 gid = &zgid; 210 attr = &zattr; 211 table->data_vec[ix].context = NULL; 212 } 213 214 memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); 215 memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); 216 if (default_gid) { 217 table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; 218 if (action == GID_TABLE_WRITE_ACTION_DEL) 219 table->data_vec[ix].attr.gid_type = old_gid_type; 220 } 221 if (table->data_vec[ix].attr.ndev && 222 table->data_vec[ix].attr.ndev != old_net_dev) 223 dev_hold(table->data_vec[ix].attr.ndev); 224 225 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; 226 227 return ret; 228 } 229 230 static int add_gid(struct ib_device *ib_dev, u8 port, 231 struct ib_gid_table *table, int ix, 232 const union ib_gid *gid, 233 const struct ib_gid_attr *attr, 234 bool default_gid) { 235 return write_gid(ib_dev, port, table, ix, gid, attr, 236 GID_TABLE_WRITE_ACTION_ADD, default_gid); 237 } 238 239 static int modify_gid(struct ib_device *ib_dev, u8 port, 240 struct ib_gid_table *table, int ix, 241 const union ib_gid *gid, 242 const struct ib_gid_attr *attr, 243 bool default_gid) { 244 return write_gid(ib_dev, port, table, ix, gid, attr, 245 GID_TABLE_WRITE_ACTION_MODIFY, default_gid); 246 } 247 248 static int del_gid(struct ib_device *ib_dev, u8 port, 249 struct ib_gid_table *table, int ix, 250 bool default_gid) { 251 return write_gid(ib_dev, port, table, ix, &zgid, &zattr, 252 GID_TABLE_WRITE_ACTION_DEL, default_gid); 253 } 254 255 /* rwlock should be read locked */ 256 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 257 const struct ib_gid_attr *val, bool default_gid, 258 unsigned long mask, int *pempty) 259 { 260 int i = 0; 261 int found = -1; 262 int empty = pempty ? -1 : 0; 263 264 while (i < table->sz && (found < 0 || empty < 0)) { 265 struct ib_gid_table_entry *data = &table->data_vec[i]; 266 struct ib_gid_attr *attr = &data->attr; 267 int curr_index = i; 268 269 i++; 270 271 if (data->props & GID_TABLE_ENTRY_INVALID) 272 continue; 273 274 if (empty < 0) 275 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && 276 !memcmp(attr, &zattr, sizeof(*attr)) && 277 !data->props) 278 empty = curr_index; 279 280 if (found >= 0) 281 continue; 282 283 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 284 attr->gid_type != val->gid_type) 285 continue; 286 287 if (mask & GID_ATTR_FIND_MASK_GID && 288 memcmp(gid, &data->gid, sizeof(*gid))) 289 continue; 290 291 if (mask & GID_ATTR_FIND_MASK_NETDEV && 292 attr->ndev != val->ndev) 293 continue; 294 295 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 296 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 297 default_gid) 298 continue; 299 300 found = curr_index; 301 } 302 303 if (pempty) 304 *pempty = empty; 305 306 return found; 307 } 308 309 static void make_default_gid(struct net_device *dev, union ib_gid *gid) 310 { 311 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 312 addrconf_ifid_eui48(&gid->raw[8], dev); 313 } 314 315 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 316 union ib_gid *gid, struct ib_gid_attr *attr) 317 { 318 struct ib_gid_table *table; 319 int ix; 320 int ret = 0; 321 struct net_device *idev; 322 int empty; 323 324 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 325 326 if (!memcmp(gid, &zgid, sizeof(*gid))) 327 return -EINVAL; 328 329 if (ib_dev->get_netdev) { 330 idev = ib_dev->get_netdev(ib_dev, port); 331 if (idev && attr->ndev != idev) { 332 union ib_gid default_gid; 333 334 /* Adding default GIDs in not permitted */ 335 make_default_gid(idev, &default_gid); 336 if (!memcmp(gid, &default_gid, sizeof(*gid))) { 337 dev_put(idev); 338 return -EPERM; 339 } 340 } 341 if (idev) 342 dev_put(idev); 343 } 344 345 mutex_lock(&table->lock); 346 write_lock_irq(&table->rwlock); 347 348 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 349 GID_ATTR_FIND_MASK_GID_TYPE | 350 GID_ATTR_FIND_MASK_NETDEV, &empty); 351 if (ix >= 0) 352 goto out_unlock; 353 354 if (empty < 0) { 355 ret = -ENOSPC; 356 goto out_unlock; 357 } 358 359 ret = add_gid(ib_dev, port, table, empty, gid, attr, false); 360 if (!ret) 361 dispatch_gid_change_event(ib_dev, port); 362 363 out_unlock: 364 write_unlock_irq(&table->rwlock); 365 mutex_unlock(&table->lock); 366 return ret; 367 } 368 369 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 370 union ib_gid *gid, struct ib_gid_attr *attr) 371 { 372 struct ib_gid_table *table; 373 int ix; 374 375 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 376 377 mutex_lock(&table->lock); 378 write_lock_irq(&table->rwlock); 379 380 ix = find_gid(table, gid, attr, false, 381 GID_ATTR_FIND_MASK_GID | 382 GID_ATTR_FIND_MASK_GID_TYPE | 383 GID_ATTR_FIND_MASK_NETDEV | 384 GID_ATTR_FIND_MASK_DEFAULT, 385 NULL); 386 if (ix < 0) 387 goto out_unlock; 388 389 if (!del_gid(ib_dev, port, table, ix, false)) 390 dispatch_gid_change_event(ib_dev, port); 391 392 out_unlock: 393 write_unlock_irq(&table->rwlock); 394 mutex_unlock(&table->lock); 395 return 0; 396 } 397 398 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, 399 struct net_device *ndev) 400 { 401 struct ib_gid_table *table; 402 int ix; 403 bool deleted = false; 404 405 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 406 407 mutex_lock(&table->lock); 408 write_lock_irq(&table->rwlock); 409 410 for (ix = 0; ix < table->sz; ix++) 411 if (table->data_vec[ix].attr.ndev == ndev) 412 if (!del_gid(ib_dev, port, table, ix, 413 !!(table->data_vec[ix].props & 414 GID_TABLE_ENTRY_DEFAULT))) 415 deleted = true; 416 417 write_unlock_irq(&table->rwlock); 418 mutex_unlock(&table->lock); 419 420 if (deleted) 421 dispatch_gid_change_event(ib_dev, port); 422 423 return 0; 424 } 425 426 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, 427 union ib_gid *gid, struct ib_gid_attr *attr) 428 { 429 struct ib_gid_table *table; 430 431 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 432 433 if (index < 0 || index >= table->sz) 434 return -EINVAL; 435 436 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) 437 return -EAGAIN; 438 439 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 440 if (attr) { 441 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); 442 if (attr->ndev) 443 dev_hold(attr->ndev); 444 } 445 446 return 0; 447 } 448 449 static int _ib_cache_gid_table_find(struct ib_device *ib_dev, 450 const union ib_gid *gid, 451 const struct ib_gid_attr *val, 452 unsigned long mask, 453 u8 *port, u16 *index) 454 { 455 struct ib_gid_table *table; 456 u8 p; 457 int local_index; 458 unsigned long flags; 459 460 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 461 table = ib_dev->cache.ports[p].gid; 462 read_lock_irqsave(&table->rwlock, flags); 463 local_index = find_gid(table, gid, val, false, mask, NULL); 464 if (local_index >= 0) { 465 if (index) 466 *index = local_index; 467 if (port) 468 *port = p + rdma_start_port(ib_dev); 469 read_unlock_irqrestore(&table->rwlock, flags); 470 return 0; 471 } 472 read_unlock_irqrestore(&table->rwlock, flags); 473 } 474 475 return -ENOENT; 476 } 477 478 static int ib_cache_gid_find(struct ib_device *ib_dev, 479 const union ib_gid *gid, 480 enum ib_gid_type gid_type, 481 struct net_device *ndev, u8 *port, 482 u16 *index) 483 { 484 unsigned long mask = GID_ATTR_FIND_MASK_GID | 485 GID_ATTR_FIND_MASK_GID_TYPE; 486 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 487 488 if (ndev) 489 mask |= GID_ATTR_FIND_MASK_NETDEV; 490 491 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, 492 mask, port, index); 493 } 494 495 int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 496 const union ib_gid *gid, 497 enum ib_gid_type gid_type, 498 u8 port, struct net_device *ndev, 499 u16 *index) 500 { 501 int local_index; 502 struct ib_gid_table *table; 503 unsigned long mask = GID_ATTR_FIND_MASK_GID | 504 GID_ATTR_FIND_MASK_GID_TYPE; 505 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 506 unsigned long flags; 507 508 if (!rdma_is_port_valid(ib_dev, port)) 509 return -ENOENT; 510 511 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 512 513 if (ndev) 514 mask |= GID_ATTR_FIND_MASK_NETDEV; 515 516 read_lock_irqsave(&table->rwlock, flags); 517 local_index = find_gid(table, gid, &val, false, mask, NULL); 518 if (local_index >= 0) { 519 if (index) 520 *index = local_index; 521 read_unlock_irqrestore(&table->rwlock, flags); 522 return 0; 523 } 524 525 read_unlock_irqrestore(&table->rwlock, flags); 526 return -ENOENT; 527 } 528 EXPORT_SYMBOL(ib_find_cached_gid_by_port); 529 530 /** 531 * ib_find_gid_by_filter - Returns the GID table index where a specified 532 * GID value occurs 533 * @device: The device to query. 534 * @gid: The GID value to search for. 535 * @port_num: The port number of the device where the GID value could be 536 * searched. 537 * @filter: The filter function is executed on any matching GID in the table. 538 * If the filter function returns true, the corresponding index is returned, 539 * otherwise, we continue searching the GID table. It's guaranteed that 540 * while filter is executed, ndev field is valid and the structure won't 541 * change. filter is executed in an atomic context. filter must not be NULL. 542 * @index: The index into the cached GID table where the GID was found. This 543 * parameter may be NULL. 544 * 545 * ib_cache_gid_find_by_filter() searches for the specified GID value 546 * of which the filter function returns true in the port's GID table. 547 * This function is only supported on RoCE ports. 548 * 549 */ 550 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 551 const union ib_gid *gid, 552 u8 port, 553 bool (*filter)(const union ib_gid *, 554 const struct ib_gid_attr *, 555 void *), 556 void *context, 557 u16 *index) 558 { 559 struct ib_gid_table *table; 560 unsigned int i; 561 unsigned long flags; 562 bool found = false; 563 564 565 if (!rdma_is_port_valid(ib_dev, port) || 566 !rdma_protocol_roce(ib_dev, port)) 567 return -EPROTONOSUPPORT; 568 569 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 570 571 read_lock_irqsave(&table->rwlock, flags); 572 for (i = 0; i < table->sz; i++) { 573 struct ib_gid_attr attr; 574 575 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 576 goto next; 577 578 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 579 goto next; 580 581 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 582 583 if (filter(gid, &attr, context)) 584 found = true; 585 586 next: 587 if (found) 588 break; 589 } 590 read_unlock_irqrestore(&table->rwlock, flags); 591 592 if (!found) 593 return -ENOENT; 594 595 if (index) 596 *index = i; 597 return 0; 598 } 599 600 static struct ib_gid_table *alloc_gid_table(int sz) 601 { 602 struct ib_gid_table *table = 603 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 604 605 if (!table) 606 return NULL; 607 608 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); 609 if (!table->data_vec) 610 goto err_free_table; 611 612 mutex_init(&table->lock); 613 614 table->sz = sz; 615 rwlock_init(&table->rwlock); 616 617 return table; 618 619 err_free_table: 620 kfree(table); 621 return NULL; 622 } 623 624 static void release_gid_table(struct ib_gid_table *table) 625 { 626 if (table) { 627 kfree(table->data_vec); 628 kfree(table); 629 } 630 } 631 632 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 633 struct ib_gid_table *table) 634 { 635 int i; 636 bool deleted = false; 637 638 if (!table) 639 return; 640 641 write_lock_irq(&table->rwlock); 642 for (i = 0; i < table->sz; ++i) { 643 if (memcmp(&table->data_vec[i].gid, &zgid, 644 sizeof(table->data_vec[i].gid))) 645 if (!del_gid(ib_dev, port, table, i, 646 table->data_vec[i].props & 647 GID_ATTR_FIND_MASK_DEFAULT)) 648 deleted = true; 649 } 650 write_unlock_irq(&table->rwlock); 651 652 if (deleted) 653 dispatch_gid_change_event(ib_dev, port); 654 } 655 656 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 657 struct net_device *ndev, 658 unsigned long gid_type_mask, 659 enum ib_cache_gid_default_mode mode) 660 { 661 union ib_gid gid; 662 struct ib_gid_attr gid_attr; 663 struct ib_gid_attr zattr_type = zattr; 664 struct ib_gid_table *table; 665 unsigned int gid_type; 666 667 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 668 669 make_default_gid(ndev, &gid); 670 memset(&gid_attr, 0, sizeof(gid_attr)); 671 gid_attr.ndev = ndev; 672 673 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 674 int ix; 675 union ib_gid current_gid; 676 struct ib_gid_attr current_gid_attr = {}; 677 678 if (1UL << gid_type & ~gid_type_mask) 679 continue; 680 681 gid_attr.gid_type = gid_type; 682 683 mutex_lock(&table->lock); 684 write_lock_irq(&table->rwlock); 685 ix = find_gid(table, NULL, &gid_attr, true, 686 GID_ATTR_FIND_MASK_GID_TYPE | 687 GID_ATTR_FIND_MASK_DEFAULT, 688 NULL); 689 690 /* Coudn't find default GID location */ 691 if (WARN_ON(ix < 0)) 692 goto release; 693 694 zattr_type.gid_type = gid_type; 695 696 if (!__ib_cache_gid_get(ib_dev, port, ix, 697 ¤t_gid, ¤t_gid_attr) && 698 mode == IB_CACHE_GID_DEFAULT_MODE_SET && 699 !memcmp(&gid, ¤t_gid, sizeof(gid)) && 700 !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) 701 goto release; 702 703 if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || 704 memcmp(¤t_gid_attr, &zattr_type, 705 sizeof(current_gid_attr))) { 706 if (del_gid(ib_dev, port, table, ix, true)) { 707 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 708 ix, gid.raw); 709 goto release; 710 } else { 711 dispatch_gid_change_event(ib_dev, port); 712 } 713 } 714 715 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 716 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 717 pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 718 gid.raw); 719 else 720 dispatch_gid_change_event(ib_dev, port); 721 } 722 723 release: 724 if (current_gid_attr.ndev) 725 dev_put(current_gid_attr.ndev); 726 write_unlock_irq(&table->rwlock); 727 mutex_unlock(&table->lock); 728 } 729 } 730 731 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 732 struct ib_gid_table *table) 733 { 734 unsigned int i; 735 unsigned long roce_gid_type_mask; 736 unsigned int num_default_gids; 737 unsigned int current_gid = 0; 738 739 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 740 num_default_gids = hweight_long(roce_gid_type_mask); 741 for (i = 0; i < num_default_gids && i < table->sz; i++) { 742 struct ib_gid_table_entry *entry = 743 &table->data_vec[i]; 744 745 entry->props |= GID_TABLE_ENTRY_DEFAULT; 746 current_gid = find_next_bit(&roce_gid_type_mask, 747 BITS_PER_LONG, 748 current_gid); 749 entry->attr.gid_type = current_gid++; 750 } 751 752 return 0; 753 } 754 755 static int _gid_table_setup_one(struct ib_device *ib_dev) 756 { 757 u8 port; 758 struct ib_gid_table *table; 759 int err = 0; 760 761 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 762 u8 rdma_port = port + rdma_start_port(ib_dev); 763 764 table = 765 alloc_gid_table( 766 ib_dev->port_immutable[rdma_port].gid_tbl_len); 767 if (!table) { 768 err = -ENOMEM; 769 goto rollback_table_setup; 770 } 771 772 err = gid_table_reserve_default(ib_dev, 773 port + rdma_start_port(ib_dev), 774 table); 775 if (err) 776 goto rollback_table_setup; 777 ib_dev->cache.ports[port].gid = table; 778 } 779 780 return 0; 781 782 rollback_table_setup: 783 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 784 table = ib_dev->cache.ports[port].gid; 785 786 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 787 table); 788 release_gid_table(table); 789 } 790 791 return err; 792 } 793 794 static void gid_table_release_one(struct ib_device *ib_dev) 795 { 796 struct ib_gid_table *table; 797 u8 port; 798 799 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 800 table = ib_dev->cache.ports[port].gid; 801 release_gid_table(table); 802 ib_dev->cache.ports[port].gid = NULL; 803 } 804 } 805 806 static void gid_table_cleanup_one(struct ib_device *ib_dev) 807 { 808 struct ib_gid_table *table; 809 u8 port; 810 811 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 812 table = ib_dev->cache.ports[port].gid; 813 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 814 table); 815 } 816 } 817 818 static int gid_table_setup_one(struct ib_device *ib_dev) 819 { 820 int err; 821 822 err = _gid_table_setup_one(ib_dev); 823 824 if (err) 825 return err; 826 827 err = roce_rescan_device(ib_dev); 828 829 if (err) { 830 gid_table_cleanup_one(ib_dev); 831 gid_table_release_one(ib_dev); 832 } 833 834 return err; 835 } 836 837 int ib_get_cached_gid(struct ib_device *device, 838 u8 port_num, 839 int index, 840 union ib_gid *gid, 841 struct ib_gid_attr *gid_attr) 842 { 843 int res; 844 unsigned long flags; 845 struct ib_gid_table *table; 846 847 if (!rdma_is_port_valid(device, port_num)) 848 return -EINVAL; 849 850 table = device->cache.ports[port_num - rdma_start_port(device)].gid; 851 read_lock_irqsave(&table->rwlock, flags); 852 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 853 read_unlock_irqrestore(&table->rwlock, flags); 854 855 return res; 856 } 857 EXPORT_SYMBOL(ib_get_cached_gid); 858 859 int ib_find_cached_gid(struct ib_device *device, 860 const union ib_gid *gid, 861 enum ib_gid_type gid_type, 862 struct net_device *ndev, 863 u8 *port_num, 864 u16 *index) 865 { 866 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 867 } 868 EXPORT_SYMBOL(ib_find_cached_gid); 869 870 int ib_find_gid_by_filter(struct ib_device *device, 871 const union ib_gid *gid, 872 u8 port_num, 873 bool (*filter)(const union ib_gid *gid, 874 const struct ib_gid_attr *, 875 void *), 876 void *context, u16 *index) 877 { 878 /* Only RoCE GID table supports filter function */ 879 if (!rdma_cap_roce_gid_table(device, port_num) && filter) 880 return -EPROTONOSUPPORT; 881 882 return ib_cache_gid_find_by_filter(device, gid, 883 port_num, filter, 884 context, index); 885 } 886 EXPORT_SYMBOL(ib_find_gid_by_filter); 887 888 int ib_get_cached_pkey(struct ib_device *device, 889 u8 port_num, 890 int index, 891 u16 *pkey) 892 { 893 struct ib_pkey_cache *cache; 894 unsigned long flags; 895 int ret = 0; 896 897 if (!rdma_is_port_valid(device, port_num)) 898 return -EINVAL; 899 900 read_lock_irqsave(&device->cache.lock, flags); 901 902 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 903 904 if (index < 0 || index >= cache->table_len) 905 ret = -EINVAL; 906 else 907 *pkey = cache->table[index]; 908 909 read_unlock_irqrestore(&device->cache.lock, flags); 910 911 return ret; 912 } 913 EXPORT_SYMBOL(ib_get_cached_pkey); 914 915 int ib_get_cached_subnet_prefix(struct ib_device *device, 916 u8 port_num, 917 u64 *sn_pfx) 918 { 919 unsigned long flags; 920 int p; 921 922 if (port_num < rdma_start_port(device) || 923 port_num > rdma_end_port(device)) 924 return -EINVAL; 925 926 p = port_num - rdma_start_port(device); 927 read_lock_irqsave(&device->cache.lock, flags); 928 *sn_pfx = device->cache.ports[p].subnet_prefix; 929 read_unlock_irqrestore(&device->cache.lock, flags); 930 931 return 0; 932 } 933 EXPORT_SYMBOL(ib_get_cached_subnet_prefix); 934 935 int ib_find_cached_pkey(struct ib_device *device, 936 u8 port_num, 937 u16 pkey, 938 u16 *index) 939 { 940 struct ib_pkey_cache *cache; 941 unsigned long flags; 942 int i; 943 int ret = -ENOENT; 944 int partial_ix = -1; 945 946 if (!rdma_is_port_valid(device, port_num)) 947 return -EINVAL; 948 949 read_lock_irqsave(&device->cache.lock, flags); 950 951 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 952 953 *index = -1; 954 955 for (i = 0; i < cache->table_len; ++i) 956 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 957 if (cache->table[i] & 0x8000) { 958 *index = i; 959 ret = 0; 960 break; 961 } else 962 partial_ix = i; 963 } 964 965 if (ret && partial_ix >= 0) { 966 *index = partial_ix; 967 ret = 0; 968 } 969 970 read_unlock_irqrestore(&device->cache.lock, flags); 971 972 return ret; 973 } 974 EXPORT_SYMBOL(ib_find_cached_pkey); 975 976 int ib_find_exact_cached_pkey(struct ib_device *device, 977 u8 port_num, 978 u16 pkey, 979 u16 *index) 980 { 981 struct ib_pkey_cache *cache; 982 unsigned long flags; 983 int i; 984 int ret = -ENOENT; 985 986 if (!rdma_is_port_valid(device, port_num)) 987 return -EINVAL; 988 989 read_lock_irqsave(&device->cache.lock, flags); 990 991 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 992 993 *index = -1; 994 995 for (i = 0; i < cache->table_len; ++i) 996 if (cache->table[i] == pkey) { 997 *index = i; 998 ret = 0; 999 break; 1000 } 1001 1002 read_unlock_irqrestore(&device->cache.lock, flags); 1003 1004 return ret; 1005 } 1006 EXPORT_SYMBOL(ib_find_exact_cached_pkey); 1007 1008 int ib_get_cached_lmc(struct ib_device *device, 1009 u8 port_num, 1010 u8 *lmc) 1011 { 1012 unsigned long flags; 1013 int ret = 0; 1014 1015 if (!rdma_is_port_valid(device, port_num)) 1016 return -EINVAL; 1017 1018 read_lock_irqsave(&device->cache.lock, flags); 1019 *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc; 1020 read_unlock_irqrestore(&device->cache.lock, flags); 1021 1022 return ret; 1023 } 1024 EXPORT_SYMBOL(ib_get_cached_lmc); 1025 1026 int ib_get_cached_port_state(struct ib_device *device, 1027 u8 port_num, 1028 enum ib_port_state *port_state) 1029 { 1030 unsigned long flags; 1031 int ret = 0; 1032 1033 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 1034 return -EINVAL; 1035 1036 read_lock_irqsave(&device->cache.lock, flags); 1037 *port_state = device->cache.ports[port_num 1038 - rdma_start_port(device)].port_state; 1039 read_unlock_irqrestore(&device->cache.lock, flags); 1040 1041 return ret; 1042 } 1043 EXPORT_SYMBOL(ib_get_cached_port_state); 1044 1045 static void ib_cache_update(struct ib_device *device, 1046 u8 port, 1047 bool enforce_security) 1048 { 1049 struct ib_port_attr *tprops = NULL; 1050 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1051 struct ib_gid_cache { 1052 int table_len; 1053 union ib_gid table[0]; 1054 } *gid_cache = NULL; 1055 int i; 1056 int ret; 1057 struct ib_gid_table *table; 1058 bool use_roce_gid_table = 1059 rdma_cap_roce_gid_table(device, port); 1060 1061 if (!rdma_is_port_valid(device, port)) 1062 return; 1063 1064 table = device->cache.ports[port - rdma_start_port(device)].gid; 1065 1066 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1067 if (!tprops) 1068 return; 1069 1070 ret = ib_query_port(device, port, tprops); 1071 if (ret) { 1072 pr_warn("ib_query_port failed (%d) for %s\n", 1073 ret, device->name); 1074 goto err; 1075 } 1076 1077 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * 1078 sizeof *pkey_cache->table, GFP_KERNEL); 1079 if (!pkey_cache) 1080 goto err; 1081 1082 pkey_cache->table_len = tprops->pkey_tbl_len; 1083 1084 if (!use_roce_gid_table) { 1085 gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * 1086 sizeof(*gid_cache->table), GFP_KERNEL); 1087 if (!gid_cache) 1088 goto err; 1089 1090 gid_cache->table_len = tprops->gid_tbl_len; 1091 } 1092 1093 for (i = 0; i < pkey_cache->table_len; ++i) { 1094 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1095 if (ret) { 1096 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1097 ret, device->name, i); 1098 goto err; 1099 } 1100 } 1101 1102 if (!use_roce_gid_table) { 1103 for (i = 0; i < gid_cache->table_len; ++i) { 1104 ret = ib_query_gid(device, port, i, 1105 gid_cache->table + i, NULL); 1106 if (ret) { 1107 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", 1108 ret, device->name, i); 1109 goto err; 1110 } 1111 } 1112 } 1113 1114 write_lock_irq(&device->cache.lock); 1115 1116 old_pkey_cache = device->cache.ports[port - 1117 rdma_start_port(device)].pkey; 1118 1119 device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache; 1120 if (!use_roce_gid_table) { 1121 write_lock(&table->rwlock); 1122 for (i = 0; i < gid_cache->table_len; i++) { 1123 modify_gid(device, port, table, i, gid_cache->table + i, 1124 &zattr, false); 1125 } 1126 write_unlock(&table->rwlock); 1127 } 1128 1129 device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc; 1130 device->cache.ports[port - rdma_start_port(device)].port_state = 1131 tprops->state; 1132 1133 device->cache.ports[port - rdma_start_port(device)].subnet_prefix = 1134 tprops->subnet_prefix; 1135 write_unlock_irq(&device->cache.lock); 1136 1137 if (enforce_security) 1138 ib_security_cache_change(device, 1139 port, 1140 tprops->subnet_prefix); 1141 1142 kfree(gid_cache); 1143 kfree(old_pkey_cache); 1144 kfree(tprops); 1145 return; 1146 1147 err: 1148 kfree(pkey_cache); 1149 kfree(gid_cache); 1150 kfree(tprops); 1151 } 1152 1153 static void ib_cache_task(struct work_struct *_work) 1154 { 1155 struct ib_update_work *work = 1156 container_of(_work, struct ib_update_work, work); 1157 1158 ib_cache_update(work->device, 1159 work->port_num, 1160 work->enforce_security); 1161 kfree(work); 1162 } 1163 1164 static void ib_cache_event(struct ib_event_handler *handler, 1165 struct ib_event *event) 1166 { 1167 struct ib_update_work *work; 1168 1169 if (event->event == IB_EVENT_PORT_ERR || 1170 event->event == IB_EVENT_PORT_ACTIVE || 1171 event->event == IB_EVENT_LID_CHANGE || 1172 event->event == IB_EVENT_PKEY_CHANGE || 1173 event->event == IB_EVENT_SM_CHANGE || 1174 event->event == IB_EVENT_CLIENT_REREGISTER || 1175 event->event == IB_EVENT_GID_CHANGE) { 1176 work = kmalloc(sizeof *work, GFP_ATOMIC); 1177 if (work) { 1178 INIT_WORK(&work->work, ib_cache_task); 1179 work->device = event->device; 1180 work->port_num = event->element.port_num; 1181 if (event->event == IB_EVENT_PKEY_CHANGE || 1182 event->event == IB_EVENT_GID_CHANGE) 1183 work->enforce_security = true; 1184 else 1185 work->enforce_security = false; 1186 1187 queue_work(ib_wq, &work->work); 1188 } 1189 } 1190 } 1191 1192 int ib_cache_setup_one(struct ib_device *device) 1193 { 1194 int p; 1195 int err; 1196 1197 rwlock_init(&device->cache.lock); 1198 1199 device->cache.ports = 1200 kzalloc(sizeof(*device->cache.ports) * 1201 (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); 1202 if (!device->cache.ports) { 1203 err = -ENOMEM; 1204 goto out; 1205 } 1206 1207 err = gid_table_setup_one(device); 1208 if (err) 1209 goto out; 1210 1211 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1212 ib_cache_update(device, p + rdma_start_port(device), true); 1213 1214 INIT_IB_EVENT_HANDLER(&device->cache.event_handler, 1215 device, ib_cache_event); 1216 err = ib_register_event_handler(&device->cache.event_handler); 1217 if (err) 1218 goto err; 1219 1220 return 0; 1221 1222 err: 1223 gid_table_cleanup_one(device); 1224 out: 1225 return err; 1226 } 1227 1228 void ib_cache_release_one(struct ib_device *device) 1229 { 1230 int p; 1231 1232 /* 1233 * The release function frees all the cache elements. 1234 * This function should be called as part of freeing 1235 * all the device's resources when the cache could no 1236 * longer be accessed. 1237 */ 1238 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1239 kfree(device->cache.ports[p].pkey); 1240 1241 gid_table_release_one(device); 1242 kfree(device->cache.ports); 1243 } 1244 1245 void ib_cache_cleanup_one(struct ib_device *device) 1246 { 1247 /* The cleanup function unregisters the event handler, 1248 * waits for all in-progress workqueue elements and cleans 1249 * up the GID cache. This function should be called after 1250 * the device was removed from the devices list and all 1251 * clients were removed, so the cache exists but is 1252 * non-functional and shouldn't be updated anymore. 1253 */ 1254 ib_unregister_event_handler(&device->cache.event_handler); 1255 flush_workqueue(ib_wq); 1256 gid_table_cleanup_one(device); 1257 } 1258 1259 void __init ib_cache_setup(void) 1260 { 1261 roce_gid_mgmt_init(); 1262 } 1263 1264 void __exit ib_cache_cleanup(void) 1265 { 1266 roce_gid_mgmt_cleanup(); 1267 } 1268