1 /* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/module.h> 37 #include <linux/errno.h> 38 #include <linux/slab.h> 39 #include <linux/workqueue.h> 40 #include <linux/netdevice.h> 41 #include <net/addrconf.h> 42 43 #include <rdma/ib_cache.h> 44 45 #include "core_priv.h" 46 47 struct ib_pkey_cache { 48 int table_len; 49 u16 table[0]; 50 }; 51 52 struct ib_update_work { 53 struct work_struct work; 54 struct ib_device *device; 55 u8 port_num; 56 bool enforce_security; 57 }; 58 59 union ib_gid zgid; 60 EXPORT_SYMBOL(zgid); 61 62 static const struct ib_gid_attr zattr; 63 64 enum gid_attr_find_mask { 65 GID_ATTR_FIND_MASK_GID = 1UL << 0, 66 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 67 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 68 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 69 }; 70 71 enum gid_table_entry_props { 72 GID_TABLE_ENTRY_INVALID = 1UL << 0, 73 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 74 }; 75 76 enum gid_table_write_action { 77 GID_TABLE_WRITE_ACTION_ADD, 78 GID_TABLE_WRITE_ACTION_DEL, 79 /* MODIFY only updates the GID table. Currently only used by 80 * ib_cache_update. 81 */ 82 GID_TABLE_WRITE_ACTION_MODIFY 83 }; 84 85 struct ib_gid_table_entry { 86 unsigned long props; 87 union ib_gid gid; 88 struct ib_gid_attr attr; 89 void *context; 90 }; 91 92 struct ib_gid_table { 93 int sz; 94 /* In RoCE, adding a GID to the table requires: 95 * (a) Find if this GID is already exists. 96 * (b) Find a free space. 97 * (c) Write the new GID 98 * 99 * Delete requires different set of operations: 100 * (a) Find the GID 101 * (b) Delete it. 102 * 103 * Add/delete should be carried out atomically. 104 * This is done by locking this mutex from multiple 105 * writers. We don't need this lock for IB, as the MAD 106 * layer replaces all entries. All data_vec entries 107 * are locked by this lock. 108 **/ 109 struct mutex lock; 110 /* This lock protects the table entries from being 111 * read and written simultaneously. 112 */ 113 rwlock_t rwlock; 114 struct ib_gid_table_entry *data_vec; 115 }; 116 117 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 118 { 119 if (rdma_cap_roce_gid_table(ib_dev, port)) { 120 struct ib_event event; 121 122 event.device = ib_dev; 123 event.element.port_num = port; 124 event.event = IB_EVENT_GID_CHANGE; 125 126 ib_dispatch_event(&event); 127 } 128 } 129 130 static const char * const gid_type_str[] = { 131 [IB_GID_TYPE_IB] = "IB/RoCE v1", 132 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", 133 }; 134 135 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 136 { 137 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 138 return gid_type_str[gid_type]; 139 140 return "Invalid GID type"; 141 } 142 EXPORT_SYMBOL(ib_cache_gid_type_str); 143 144 int ib_cache_gid_parse_type_str(const char *buf) 145 { 146 unsigned int i; 147 size_t len; 148 int err = -EINVAL; 149 150 len = strlen(buf); 151 if (len == 0) 152 return -EINVAL; 153 154 if (buf[len - 1] == '\n') 155 len--; 156 157 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) 158 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && 159 len == strlen(gid_type_str[i])) { 160 err = i; 161 break; 162 } 163 164 return err; 165 } 166 EXPORT_SYMBOL(ib_cache_gid_parse_type_str); 167 168 /* This function expects that rwlock will be write locked in all 169 * scenarios and that lock will be locked in sleep-able (RoCE) 170 * scenarios. 171 */ 172 static int write_gid(struct ib_device *ib_dev, u8 port, 173 struct ib_gid_table *table, int ix, 174 const union ib_gid *gid, 175 const struct ib_gid_attr *attr, 176 enum gid_table_write_action action, 177 bool default_gid) 178 __releases(&table->rwlock) __acquires(&table->rwlock) 179 { 180 int ret = 0; 181 struct net_device *old_net_dev; 182 enum ib_gid_type old_gid_type; 183 184 /* in rdma_cap_roce_gid_table, this funciton should be protected by a 185 * sleep-able lock. 186 */ 187 188 if (rdma_cap_roce_gid_table(ib_dev, port)) { 189 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 190 write_unlock_irq(&table->rwlock); 191 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by 192 * RoCE providers and thus only updates the cache. 193 */ 194 if (action == GID_TABLE_WRITE_ACTION_ADD) 195 ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, 196 &table->data_vec[ix].context); 197 else if (action == GID_TABLE_WRITE_ACTION_DEL) 198 ret = ib_dev->del_gid(ib_dev, port, ix, 199 &table->data_vec[ix].context); 200 write_lock_irq(&table->rwlock); 201 } 202 203 old_net_dev = table->data_vec[ix].attr.ndev; 204 old_gid_type = table->data_vec[ix].attr.gid_type; 205 if (old_net_dev && old_net_dev != attr->ndev) 206 dev_put(old_net_dev); 207 /* if modify_gid failed, just delete the old gid */ 208 if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { 209 gid = &zgid; 210 attr = &zattr; 211 table->data_vec[ix].context = NULL; 212 } 213 214 memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); 215 memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); 216 if (default_gid) { 217 table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; 218 if (action == GID_TABLE_WRITE_ACTION_DEL) 219 table->data_vec[ix].attr.gid_type = old_gid_type; 220 } 221 if (table->data_vec[ix].attr.ndev && 222 table->data_vec[ix].attr.ndev != old_net_dev) 223 dev_hold(table->data_vec[ix].attr.ndev); 224 225 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; 226 227 return ret; 228 } 229 230 static int add_gid(struct ib_device *ib_dev, u8 port, 231 struct ib_gid_table *table, int ix, 232 const union ib_gid *gid, 233 const struct ib_gid_attr *attr, 234 bool default_gid) { 235 return write_gid(ib_dev, port, table, ix, gid, attr, 236 GID_TABLE_WRITE_ACTION_ADD, default_gid); 237 } 238 239 static int modify_gid(struct ib_device *ib_dev, u8 port, 240 struct ib_gid_table *table, int ix, 241 const union ib_gid *gid, 242 const struct ib_gid_attr *attr, 243 bool default_gid) { 244 return write_gid(ib_dev, port, table, ix, gid, attr, 245 GID_TABLE_WRITE_ACTION_MODIFY, default_gid); 246 } 247 248 static int del_gid(struct ib_device *ib_dev, u8 port, 249 struct ib_gid_table *table, int ix, 250 bool default_gid) { 251 return write_gid(ib_dev, port, table, ix, &zgid, &zattr, 252 GID_TABLE_WRITE_ACTION_DEL, default_gid); 253 } 254 255 /* rwlock should be read locked */ 256 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 257 const struct ib_gid_attr *val, bool default_gid, 258 unsigned long mask, int *pempty) 259 { 260 int i = 0; 261 int found = -1; 262 int empty = pempty ? -1 : 0; 263 264 while (i < table->sz && (found < 0 || empty < 0)) { 265 struct ib_gid_table_entry *data = &table->data_vec[i]; 266 struct ib_gid_attr *attr = &data->attr; 267 int curr_index = i; 268 269 i++; 270 271 if (data->props & GID_TABLE_ENTRY_INVALID) 272 continue; 273 274 if (empty < 0) 275 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && 276 !memcmp(attr, &zattr, sizeof(*attr)) && 277 !data->props) 278 empty = curr_index; 279 280 if (found >= 0) 281 continue; 282 283 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 284 attr->gid_type != val->gid_type) 285 continue; 286 287 if (mask & GID_ATTR_FIND_MASK_GID && 288 memcmp(gid, &data->gid, sizeof(*gid))) 289 continue; 290 291 if (mask & GID_ATTR_FIND_MASK_NETDEV && 292 attr->ndev != val->ndev) 293 continue; 294 295 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 296 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 297 default_gid) 298 continue; 299 300 found = curr_index; 301 } 302 303 if (pempty) 304 *pempty = empty; 305 306 return found; 307 } 308 309 static void make_default_gid(struct net_device *dev, union ib_gid *gid) 310 { 311 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 312 addrconf_ifid_eui48(&gid->raw[8], dev); 313 } 314 315 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 316 union ib_gid *gid, struct ib_gid_attr *attr) 317 { 318 struct ib_gid_table *table; 319 int ix; 320 int ret = 0; 321 struct net_device *idev; 322 int empty; 323 324 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 325 326 if (!memcmp(gid, &zgid, sizeof(*gid))) 327 return -EINVAL; 328 329 if (ib_dev->get_netdev) { 330 idev = ib_dev->get_netdev(ib_dev, port); 331 if (idev && attr->ndev != idev) { 332 union ib_gid default_gid; 333 334 /* Adding default GIDs in not permitted */ 335 make_default_gid(idev, &default_gid); 336 if (!memcmp(gid, &default_gid, sizeof(*gid))) { 337 dev_put(idev); 338 return -EPERM; 339 } 340 } 341 if (idev) 342 dev_put(idev); 343 } 344 345 mutex_lock(&table->lock); 346 write_lock_irq(&table->rwlock); 347 348 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 349 GID_ATTR_FIND_MASK_GID_TYPE | 350 GID_ATTR_FIND_MASK_NETDEV, &empty); 351 if (ix >= 0) 352 goto out_unlock; 353 354 if (empty < 0) { 355 ret = -ENOSPC; 356 goto out_unlock; 357 } 358 359 ret = add_gid(ib_dev, port, table, empty, gid, attr, false); 360 if (!ret) 361 dispatch_gid_change_event(ib_dev, port); 362 363 out_unlock: 364 write_unlock_irq(&table->rwlock); 365 mutex_unlock(&table->lock); 366 return ret; 367 } 368 369 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 370 union ib_gid *gid, struct ib_gid_attr *attr) 371 { 372 struct ib_gid_table *table; 373 int ix; 374 375 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 376 377 mutex_lock(&table->lock); 378 write_lock_irq(&table->rwlock); 379 380 ix = find_gid(table, gid, attr, false, 381 GID_ATTR_FIND_MASK_GID | 382 GID_ATTR_FIND_MASK_GID_TYPE | 383 GID_ATTR_FIND_MASK_NETDEV | 384 GID_ATTR_FIND_MASK_DEFAULT, 385 NULL); 386 if (ix < 0) 387 goto out_unlock; 388 389 if (!del_gid(ib_dev, port, table, ix, false)) 390 dispatch_gid_change_event(ib_dev, port); 391 392 out_unlock: 393 write_unlock_irq(&table->rwlock); 394 mutex_unlock(&table->lock); 395 return 0; 396 } 397 398 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, 399 struct net_device *ndev) 400 { 401 struct ib_gid_table *table; 402 int ix; 403 bool deleted = false; 404 405 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 406 407 mutex_lock(&table->lock); 408 write_lock_irq(&table->rwlock); 409 410 for (ix = 0; ix < table->sz; ix++) 411 if (table->data_vec[ix].attr.ndev == ndev) 412 if (!del_gid(ib_dev, port, table, ix, 413 !!(table->data_vec[ix].props & 414 GID_TABLE_ENTRY_DEFAULT))) 415 deleted = true; 416 417 write_unlock_irq(&table->rwlock); 418 mutex_unlock(&table->lock); 419 420 if (deleted) 421 dispatch_gid_change_event(ib_dev, port); 422 423 return 0; 424 } 425 426 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, 427 union ib_gid *gid, struct ib_gid_attr *attr) 428 { 429 struct ib_gid_table *table; 430 431 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 432 433 if (index < 0 || index >= table->sz) 434 return -EINVAL; 435 436 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) 437 return -EAGAIN; 438 439 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 440 if (attr) { 441 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); 442 if (attr->ndev) 443 dev_hold(attr->ndev); 444 } 445 446 return 0; 447 } 448 449 static int _ib_cache_gid_table_find(struct ib_device *ib_dev, 450 const union ib_gid *gid, 451 const struct ib_gid_attr *val, 452 unsigned long mask, 453 u8 *port, u16 *index) 454 { 455 struct ib_gid_table *table; 456 u8 p; 457 int local_index; 458 unsigned long flags; 459 460 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 461 table = ib_dev->cache.ports[p].gid; 462 read_lock_irqsave(&table->rwlock, flags); 463 local_index = find_gid(table, gid, val, false, mask, NULL); 464 if (local_index >= 0) { 465 if (index) 466 *index = local_index; 467 if (port) 468 *port = p + rdma_start_port(ib_dev); 469 read_unlock_irqrestore(&table->rwlock, flags); 470 return 0; 471 } 472 read_unlock_irqrestore(&table->rwlock, flags); 473 } 474 475 return -ENOENT; 476 } 477 478 static int ib_cache_gid_find(struct ib_device *ib_dev, 479 const union ib_gid *gid, 480 enum ib_gid_type gid_type, 481 struct net_device *ndev, u8 *port, 482 u16 *index) 483 { 484 unsigned long mask = GID_ATTR_FIND_MASK_GID | 485 GID_ATTR_FIND_MASK_GID_TYPE; 486 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 487 488 if (ndev) 489 mask |= GID_ATTR_FIND_MASK_NETDEV; 490 491 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, 492 mask, port, index); 493 } 494 495 int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 496 const union ib_gid *gid, 497 enum ib_gid_type gid_type, 498 u8 port, struct net_device *ndev, 499 u16 *index) 500 { 501 int local_index; 502 struct ib_gid_table *table; 503 unsigned long mask = GID_ATTR_FIND_MASK_GID | 504 GID_ATTR_FIND_MASK_GID_TYPE; 505 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 506 unsigned long flags; 507 508 if (!rdma_is_port_valid(ib_dev, port)) 509 return -ENOENT; 510 511 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 512 513 if (ndev) 514 mask |= GID_ATTR_FIND_MASK_NETDEV; 515 516 read_lock_irqsave(&table->rwlock, flags); 517 local_index = find_gid(table, gid, &val, false, mask, NULL); 518 if (local_index >= 0) { 519 if (index) 520 *index = local_index; 521 read_unlock_irqrestore(&table->rwlock, flags); 522 return 0; 523 } 524 525 read_unlock_irqrestore(&table->rwlock, flags); 526 return -ENOENT; 527 } 528 EXPORT_SYMBOL(ib_find_cached_gid_by_port); 529 530 /** 531 * ib_find_gid_by_filter - Returns the GID table index where a specified 532 * GID value occurs 533 * @device: The device to query. 534 * @gid: The GID value to search for. 535 * @port_num: The port number of the device where the GID value could be 536 * searched. 537 * @filter: The filter function is executed on any matching GID in the table. 538 * If the filter function returns true, the corresponding index is returned, 539 * otherwise, we continue searching the GID table. It's guaranteed that 540 * while filter is executed, ndev field is valid and the structure won't 541 * change. filter is executed in an atomic context. filter must not be NULL. 542 * @index: The index into the cached GID table where the GID was found. This 543 * parameter may be NULL. 544 * 545 * ib_cache_gid_find_by_filter() searches for the specified GID value 546 * of which the filter function returns true in the port's GID table. 547 * This function is only supported on RoCE ports. 548 * 549 */ 550 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 551 const union ib_gid *gid, 552 u8 port, 553 bool (*filter)(const union ib_gid *, 554 const struct ib_gid_attr *, 555 void *), 556 void *context, 557 u16 *index) 558 { 559 struct ib_gid_table *table; 560 unsigned int i; 561 unsigned long flags; 562 bool found = false; 563 564 565 if (!rdma_is_port_valid(ib_dev, port) || 566 !rdma_protocol_roce(ib_dev, port)) 567 return -EPROTONOSUPPORT; 568 569 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 570 571 read_lock_irqsave(&table->rwlock, flags); 572 for (i = 0; i < table->sz; i++) { 573 struct ib_gid_attr attr; 574 575 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 576 continue; 577 578 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 579 continue; 580 581 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 582 583 if (filter(gid, &attr, context)) { 584 found = true; 585 if (index) 586 *index = i; 587 break; 588 } 589 } 590 read_unlock_irqrestore(&table->rwlock, flags); 591 592 if (!found) 593 return -ENOENT; 594 return 0; 595 } 596 597 static struct ib_gid_table *alloc_gid_table(int sz) 598 { 599 struct ib_gid_table *table = 600 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 601 602 if (!table) 603 return NULL; 604 605 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); 606 if (!table->data_vec) 607 goto err_free_table; 608 609 mutex_init(&table->lock); 610 611 table->sz = sz; 612 rwlock_init(&table->rwlock); 613 614 return table; 615 616 err_free_table: 617 kfree(table); 618 return NULL; 619 } 620 621 static void release_gid_table(struct ib_gid_table *table) 622 { 623 if (table) { 624 kfree(table->data_vec); 625 kfree(table); 626 } 627 } 628 629 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 630 struct ib_gid_table *table) 631 { 632 int i; 633 bool deleted = false; 634 635 if (!table) 636 return; 637 638 write_lock_irq(&table->rwlock); 639 for (i = 0; i < table->sz; ++i) { 640 if (memcmp(&table->data_vec[i].gid, &zgid, 641 sizeof(table->data_vec[i].gid))) 642 if (!del_gid(ib_dev, port, table, i, 643 table->data_vec[i].props & 644 GID_ATTR_FIND_MASK_DEFAULT)) 645 deleted = true; 646 } 647 write_unlock_irq(&table->rwlock); 648 649 if (deleted) 650 dispatch_gid_change_event(ib_dev, port); 651 } 652 653 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 654 struct net_device *ndev, 655 unsigned long gid_type_mask, 656 enum ib_cache_gid_default_mode mode) 657 { 658 union ib_gid gid; 659 struct ib_gid_attr gid_attr; 660 struct ib_gid_attr zattr_type = zattr; 661 struct ib_gid_table *table; 662 unsigned int gid_type; 663 664 table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; 665 666 make_default_gid(ndev, &gid); 667 memset(&gid_attr, 0, sizeof(gid_attr)); 668 gid_attr.ndev = ndev; 669 670 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 671 int ix; 672 union ib_gid current_gid; 673 struct ib_gid_attr current_gid_attr = {}; 674 675 if (1UL << gid_type & ~gid_type_mask) 676 continue; 677 678 gid_attr.gid_type = gid_type; 679 680 mutex_lock(&table->lock); 681 write_lock_irq(&table->rwlock); 682 ix = find_gid(table, NULL, &gid_attr, true, 683 GID_ATTR_FIND_MASK_GID_TYPE | 684 GID_ATTR_FIND_MASK_DEFAULT, 685 NULL); 686 687 /* Coudn't find default GID location */ 688 if (WARN_ON(ix < 0)) 689 goto release; 690 691 zattr_type.gid_type = gid_type; 692 693 if (!__ib_cache_gid_get(ib_dev, port, ix, 694 ¤t_gid, ¤t_gid_attr) && 695 mode == IB_CACHE_GID_DEFAULT_MODE_SET && 696 !memcmp(&gid, ¤t_gid, sizeof(gid)) && 697 !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) 698 goto release; 699 700 if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || 701 memcmp(¤t_gid_attr, &zattr_type, 702 sizeof(current_gid_attr))) { 703 if (del_gid(ib_dev, port, table, ix, true)) { 704 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 705 ix, gid.raw); 706 goto release; 707 } else { 708 dispatch_gid_change_event(ib_dev, port); 709 } 710 } 711 712 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 713 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 714 pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 715 gid.raw); 716 else 717 dispatch_gid_change_event(ib_dev, port); 718 } 719 720 release: 721 if (current_gid_attr.ndev) 722 dev_put(current_gid_attr.ndev); 723 write_unlock_irq(&table->rwlock); 724 mutex_unlock(&table->lock); 725 } 726 } 727 728 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 729 struct ib_gid_table *table) 730 { 731 unsigned int i; 732 unsigned long roce_gid_type_mask; 733 unsigned int num_default_gids; 734 unsigned int current_gid = 0; 735 736 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 737 num_default_gids = hweight_long(roce_gid_type_mask); 738 for (i = 0; i < num_default_gids && i < table->sz; i++) { 739 struct ib_gid_table_entry *entry = 740 &table->data_vec[i]; 741 742 entry->props |= GID_TABLE_ENTRY_DEFAULT; 743 current_gid = find_next_bit(&roce_gid_type_mask, 744 BITS_PER_LONG, 745 current_gid); 746 entry->attr.gid_type = current_gid++; 747 } 748 749 return 0; 750 } 751 752 static int _gid_table_setup_one(struct ib_device *ib_dev) 753 { 754 u8 port; 755 struct ib_gid_table *table; 756 int err = 0; 757 758 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 759 u8 rdma_port = port + rdma_start_port(ib_dev); 760 761 table = 762 alloc_gid_table( 763 ib_dev->port_immutable[rdma_port].gid_tbl_len); 764 if (!table) { 765 err = -ENOMEM; 766 goto rollback_table_setup; 767 } 768 769 err = gid_table_reserve_default(ib_dev, 770 port + rdma_start_port(ib_dev), 771 table); 772 if (err) 773 goto rollback_table_setup; 774 ib_dev->cache.ports[port].gid = table; 775 } 776 777 return 0; 778 779 rollback_table_setup: 780 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 781 table = ib_dev->cache.ports[port].gid; 782 783 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 784 table); 785 release_gid_table(table); 786 } 787 788 return err; 789 } 790 791 static void gid_table_release_one(struct ib_device *ib_dev) 792 { 793 struct ib_gid_table *table; 794 u8 port; 795 796 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 797 table = ib_dev->cache.ports[port].gid; 798 release_gid_table(table); 799 ib_dev->cache.ports[port].gid = NULL; 800 } 801 } 802 803 static void gid_table_cleanup_one(struct ib_device *ib_dev) 804 { 805 struct ib_gid_table *table; 806 u8 port; 807 808 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 809 table = ib_dev->cache.ports[port].gid; 810 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 811 table); 812 } 813 } 814 815 static int gid_table_setup_one(struct ib_device *ib_dev) 816 { 817 int err; 818 819 err = _gid_table_setup_one(ib_dev); 820 821 if (err) 822 return err; 823 824 rdma_roce_rescan_device(ib_dev); 825 826 return err; 827 } 828 829 int ib_get_cached_gid(struct ib_device *device, 830 u8 port_num, 831 int index, 832 union ib_gid *gid, 833 struct ib_gid_attr *gid_attr) 834 { 835 int res; 836 unsigned long flags; 837 struct ib_gid_table *table; 838 839 if (!rdma_is_port_valid(device, port_num)) 840 return -EINVAL; 841 842 table = device->cache.ports[port_num - rdma_start_port(device)].gid; 843 read_lock_irqsave(&table->rwlock, flags); 844 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 845 read_unlock_irqrestore(&table->rwlock, flags); 846 847 return res; 848 } 849 EXPORT_SYMBOL(ib_get_cached_gid); 850 851 int ib_find_cached_gid(struct ib_device *device, 852 const union ib_gid *gid, 853 enum ib_gid_type gid_type, 854 struct net_device *ndev, 855 u8 *port_num, 856 u16 *index) 857 { 858 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 859 } 860 EXPORT_SYMBOL(ib_find_cached_gid); 861 862 int ib_find_gid_by_filter(struct ib_device *device, 863 const union ib_gid *gid, 864 u8 port_num, 865 bool (*filter)(const union ib_gid *gid, 866 const struct ib_gid_attr *, 867 void *), 868 void *context, u16 *index) 869 { 870 /* Only RoCE GID table supports filter function */ 871 if (!rdma_cap_roce_gid_table(device, port_num) && filter) 872 return -EPROTONOSUPPORT; 873 874 return ib_cache_gid_find_by_filter(device, gid, 875 port_num, filter, 876 context, index); 877 } 878 879 int ib_get_cached_pkey(struct ib_device *device, 880 u8 port_num, 881 int index, 882 u16 *pkey) 883 { 884 struct ib_pkey_cache *cache; 885 unsigned long flags; 886 int ret = 0; 887 888 if (!rdma_is_port_valid(device, port_num)) 889 return -EINVAL; 890 891 read_lock_irqsave(&device->cache.lock, flags); 892 893 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 894 895 if (index < 0 || index >= cache->table_len) 896 ret = -EINVAL; 897 else 898 *pkey = cache->table[index]; 899 900 read_unlock_irqrestore(&device->cache.lock, flags); 901 902 return ret; 903 } 904 EXPORT_SYMBOL(ib_get_cached_pkey); 905 906 int ib_get_cached_subnet_prefix(struct ib_device *device, 907 u8 port_num, 908 u64 *sn_pfx) 909 { 910 unsigned long flags; 911 int p; 912 913 if (port_num < rdma_start_port(device) || 914 port_num > rdma_end_port(device)) 915 return -EINVAL; 916 917 p = port_num - rdma_start_port(device); 918 read_lock_irqsave(&device->cache.lock, flags); 919 *sn_pfx = device->cache.ports[p].subnet_prefix; 920 read_unlock_irqrestore(&device->cache.lock, flags); 921 922 return 0; 923 } 924 EXPORT_SYMBOL(ib_get_cached_subnet_prefix); 925 926 int ib_find_cached_pkey(struct ib_device *device, 927 u8 port_num, 928 u16 pkey, 929 u16 *index) 930 { 931 struct ib_pkey_cache *cache; 932 unsigned long flags; 933 int i; 934 int ret = -ENOENT; 935 int partial_ix = -1; 936 937 if (!rdma_is_port_valid(device, port_num)) 938 return -EINVAL; 939 940 read_lock_irqsave(&device->cache.lock, flags); 941 942 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 943 944 *index = -1; 945 946 for (i = 0; i < cache->table_len; ++i) 947 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 948 if (cache->table[i] & 0x8000) { 949 *index = i; 950 ret = 0; 951 break; 952 } else 953 partial_ix = i; 954 } 955 956 if (ret && partial_ix >= 0) { 957 *index = partial_ix; 958 ret = 0; 959 } 960 961 read_unlock_irqrestore(&device->cache.lock, flags); 962 963 return ret; 964 } 965 EXPORT_SYMBOL(ib_find_cached_pkey); 966 967 int ib_find_exact_cached_pkey(struct ib_device *device, 968 u8 port_num, 969 u16 pkey, 970 u16 *index) 971 { 972 struct ib_pkey_cache *cache; 973 unsigned long flags; 974 int i; 975 int ret = -ENOENT; 976 977 if (!rdma_is_port_valid(device, port_num)) 978 return -EINVAL; 979 980 read_lock_irqsave(&device->cache.lock, flags); 981 982 cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; 983 984 *index = -1; 985 986 for (i = 0; i < cache->table_len; ++i) 987 if (cache->table[i] == pkey) { 988 *index = i; 989 ret = 0; 990 break; 991 } 992 993 read_unlock_irqrestore(&device->cache.lock, flags); 994 995 return ret; 996 } 997 EXPORT_SYMBOL(ib_find_exact_cached_pkey); 998 999 int ib_get_cached_lmc(struct ib_device *device, 1000 u8 port_num, 1001 u8 *lmc) 1002 { 1003 unsigned long flags; 1004 int ret = 0; 1005 1006 if (!rdma_is_port_valid(device, port_num)) 1007 return -EINVAL; 1008 1009 read_lock_irqsave(&device->cache.lock, flags); 1010 *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc; 1011 read_unlock_irqrestore(&device->cache.lock, flags); 1012 1013 return ret; 1014 } 1015 EXPORT_SYMBOL(ib_get_cached_lmc); 1016 1017 int ib_get_cached_port_state(struct ib_device *device, 1018 u8 port_num, 1019 enum ib_port_state *port_state) 1020 { 1021 unsigned long flags; 1022 int ret = 0; 1023 1024 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 1025 return -EINVAL; 1026 1027 read_lock_irqsave(&device->cache.lock, flags); 1028 *port_state = device->cache.ports[port_num 1029 - rdma_start_port(device)].port_state; 1030 read_unlock_irqrestore(&device->cache.lock, flags); 1031 1032 return ret; 1033 } 1034 EXPORT_SYMBOL(ib_get_cached_port_state); 1035 1036 static void ib_cache_update(struct ib_device *device, 1037 u8 port, 1038 bool enforce_security) 1039 { 1040 struct ib_port_attr *tprops = NULL; 1041 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1042 struct ib_gid_cache { 1043 int table_len; 1044 union ib_gid table[0]; 1045 } *gid_cache = NULL; 1046 int i; 1047 int ret; 1048 struct ib_gid_table *table; 1049 bool use_roce_gid_table = 1050 rdma_cap_roce_gid_table(device, port); 1051 1052 if (!rdma_is_port_valid(device, port)) 1053 return; 1054 1055 table = device->cache.ports[port - rdma_start_port(device)].gid; 1056 1057 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1058 if (!tprops) 1059 return; 1060 1061 ret = ib_query_port(device, port, tprops); 1062 if (ret) { 1063 pr_warn("ib_query_port failed (%d) for %s\n", 1064 ret, device->name); 1065 goto err; 1066 } 1067 1068 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * 1069 sizeof *pkey_cache->table, GFP_KERNEL); 1070 if (!pkey_cache) 1071 goto err; 1072 1073 pkey_cache->table_len = tprops->pkey_tbl_len; 1074 1075 if (!use_roce_gid_table) { 1076 gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * 1077 sizeof(*gid_cache->table), GFP_KERNEL); 1078 if (!gid_cache) 1079 goto err; 1080 1081 gid_cache->table_len = tprops->gid_tbl_len; 1082 } 1083 1084 for (i = 0; i < pkey_cache->table_len; ++i) { 1085 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1086 if (ret) { 1087 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1088 ret, device->name, i); 1089 goto err; 1090 } 1091 } 1092 1093 if (!use_roce_gid_table) { 1094 for (i = 0; i < gid_cache->table_len; ++i) { 1095 ret = ib_query_gid(device, port, i, 1096 gid_cache->table + i, NULL); 1097 if (ret) { 1098 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", 1099 ret, device->name, i); 1100 goto err; 1101 } 1102 } 1103 } 1104 1105 write_lock_irq(&device->cache.lock); 1106 1107 old_pkey_cache = device->cache.ports[port - 1108 rdma_start_port(device)].pkey; 1109 1110 device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache; 1111 if (!use_roce_gid_table) { 1112 write_lock(&table->rwlock); 1113 for (i = 0; i < gid_cache->table_len; i++) { 1114 modify_gid(device, port, table, i, gid_cache->table + i, 1115 &zattr, false); 1116 } 1117 write_unlock(&table->rwlock); 1118 } 1119 1120 device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc; 1121 device->cache.ports[port - rdma_start_port(device)].port_state = 1122 tprops->state; 1123 1124 device->cache.ports[port - rdma_start_port(device)].subnet_prefix = 1125 tprops->subnet_prefix; 1126 write_unlock_irq(&device->cache.lock); 1127 1128 if (enforce_security) 1129 ib_security_cache_change(device, 1130 port, 1131 tprops->subnet_prefix); 1132 1133 kfree(gid_cache); 1134 kfree(old_pkey_cache); 1135 kfree(tprops); 1136 return; 1137 1138 err: 1139 kfree(pkey_cache); 1140 kfree(gid_cache); 1141 kfree(tprops); 1142 } 1143 1144 static void ib_cache_task(struct work_struct *_work) 1145 { 1146 struct ib_update_work *work = 1147 container_of(_work, struct ib_update_work, work); 1148 1149 ib_cache_update(work->device, 1150 work->port_num, 1151 work->enforce_security); 1152 kfree(work); 1153 } 1154 1155 static void ib_cache_event(struct ib_event_handler *handler, 1156 struct ib_event *event) 1157 { 1158 struct ib_update_work *work; 1159 1160 if (event->event == IB_EVENT_PORT_ERR || 1161 event->event == IB_EVENT_PORT_ACTIVE || 1162 event->event == IB_EVENT_LID_CHANGE || 1163 event->event == IB_EVENT_PKEY_CHANGE || 1164 event->event == IB_EVENT_SM_CHANGE || 1165 event->event == IB_EVENT_CLIENT_REREGISTER || 1166 event->event == IB_EVENT_GID_CHANGE) { 1167 work = kmalloc(sizeof *work, GFP_ATOMIC); 1168 if (work) { 1169 INIT_WORK(&work->work, ib_cache_task); 1170 work->device = event->device; 1171 work->port_num = event->element.port_num; 1172 if (event->event == IB_EVENT_PKEY_CHANGE || 1173 event->event == IB_EVENT_GID_CHANGE) 1174 work->enforce_security = true; 1175 else 1176 work->enforce_security = false; 1177 1178 queue_work(ib_wq, &work->work); 1179 } 1180 } 1181 } 1182 1183 int ib_cache_setup_one(struct ib_device *device) 1184 { 1185 int p; 1186 int err; 1187 1188 rwlock_init(&device->cache.lock); 1189 1190 device->cache.ports = 1191 kzalloc(sizeof(*device->cache.ports) * 1192 (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); 1193 if (!device->cache.ports) 1194 return -ENOMEM; 1195 1196 err = gid_table_setup_one(device); 1197 if (err) { 1198 kfree(device->cache.ports); 1199 device->cache.ports = NULL; 1200 return err; 1201 } 1202 1203 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1204 ib_cache_update(device, p + rdma_start_port(device), true); 1205 1206 INIT_IB_EVENT_HANDLER(&device->cache.event_handler, 1207 device, ib_cache_event); 1208 ib_register_event_handler(&device->cache.event_handler); 1209 return 0; 1210 } 1211 1212 void ib_cache_release_one(struct ib_device *device) 1213 { 1214 int p; 1215 1216 /* 1217 * The release function frees all the cache elements. 1218 * This function should be called as part of freeing 1219 * all the device's resources when the cache could no 1220 * longer be accessed. 1221 */ 1222 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1223 kfree(device->cache.ports[p].pkey); 1224 1225 gid_table_release_one(device); 1226 kfree(device->cache.ports); 1227 } 1228 1229 void ib_cache_cleanup_one(struct ib_device *device) 1230 { 1231 /* The cleanup function unregisters the event handler, 1232 * waits for all in-progress workqueue elements and cleans 1233 * up the GID cache. This function should be called after 1234 * the device was removed from the devices list and all 1235 * clients were removed, so the cache exists but is 1236 * non-functional and shouldn't be updated anymore. 1237 */ 1238 ib_unregister_event_handler(&device->cache.event_handler); 1239 flush_workqueue(ib_wq); 1240 gid_table_cleanup_one(device); 1241 } 1242 1243 void __init ib_cache_setup(void) 1244 { 1245 roce_gid_mgmt_init(); 1246 } 1247 1248 void __exit ib_cache_cleanup(void) 1249 { 1250 roce_gid_mgmt_cleanup(); 1251 } 1252