1 /******************************************************************************* 2 3 Intel 10 Gigabit PCI Express Linux driver 4 Copyright(c) 1999 - 2013 Intel Corporation. 5 6 This program is free software; you can redistribute it and/or modify it 7 under the terms and conditions of the GNU General Public License, 8 version 2, as published by the Free Software Foundation. 9 10 This program is distributed in the hope it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 more details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 18 19 The full GNU General Public License is included in this distribution in 20 the file called "COPYING". 21 22 Contact Information: 23 Linux NICS <linux.nics@intel.com> 24 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 25 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 26 27 *******************************************************************************/ 28 29 #include "ixgbe.h" 30 #include "ixgbe_sriov.h" 31 32 #ifdef CONFIG_IXGBE_DCB 33 /** 34 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV 35 * @adapter: board private structure to initialize 36 * 37 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It 38 * will also try to cache the proper offsets if RSS/FCoE are enabled along 39 * with VMDq. 40 * 41 **/ 42 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter) 43 { 44 #ifdef IXGBE_FCOE 45 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE]; 46 #endif /* IXGBE_FCOE */ 47 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; 48 int i; 49 u16 reg_idx; 50 u8 tcs = netdev_get_num_tc(adapter->netdev); 51 52 /* verify we have DCB queueing enabled before proceeding */ 53 if (tcs <= 1) 54 return false; 55 56 /* verify we have VMDq enabled before proceeding */ 57 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) 58 return false; 59 60 /* start at VMDq register offset for SR-IOV enabled setups */ 61 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 62 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) { 63 /* If we are greater than indices move to next pool */ 64 if ((reg_idx & ~vmdq->mask) >= tcs) 65 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 66 adapter->rx_ring[i]->reg_idx = reg_idx; 67 } 68 69 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 70 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) { 71 /* If we are greater than indices move to next pool */ 72 if ((reg_idx & ~vmdq->mask) >= tcs) 73 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 74 adapter->tx_ring[i]->reg_idx = reg_idx; 75 } 76 77 #ifdef IXGBE_FCOE 78 /* nothing to do if FCoE is disabled */ 79 if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) 80 return true; 81 82 /* The work is already done if the FCoE ring is shared */ 83 if (fcoe->offset < tcs) 84 return true; 85 86 /* The FCoE rings exist separately, we need to move their reg_idx */ 87 if (fcoe->indices) { 88 u16 queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask); 89 u8 fcoe_tc = ixgbe_fcoe_get_tc(adapter); 90 91 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool; 92 for (i = fcoe->offset; i < adapter->num_rx_queues; i++) { 93 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc; 94 adapter->rx_ring[i]->reg_idx = reg_idx; 95 reg_idx++; 96 } 97 98 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool; 99 for (i = fcoe->offset; i < adapter->num_tx_queues; i++) { 100 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc; 101 adapter->tx_ring[i]->reg_idx = reg_idx; 102 reg_idx++; 103 } 104 } 105 106 #endif /* IXGBE_FCOE */ 107 return true; 108 } 109 110 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */ 111 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc, 112 unsigned int *tx, unsigned int *rx) 113 { 114 struct net_device *dev = adapter->netdev; 115 struct ixgbe_hw *hw = &adapter->hw; 116 u8 num_tcs = netdev_get_num_tc(dev); 117 118 *tx = 0; 119 *rx = 0; 120 121 switch (hw->mac.type) { 122 case ixgbe_mac_82598EB: 123 /* TxQs/TC: 4 RxQs/TC: 8 */ 124 *tx = tc << 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */ 125 *rx = tc << 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */ 126 break; 127 case ixgbe_mac_82599EB: 128 case ixgbe_mac_X540: 129 case ixgbe_mac_X550: 130 case ixgbe_mac_X550EM_x: 131 if (num_tcs > 4) { 132 /* 133 * TCs : TC0/1 TC2/3 TC4-7 134 * TxQs/TC: 32 16 8 135 * RxQs/TC: 16 16 16 136 */ 137 *rx = tc << 4; 138 if (tc < 3) 139 *tx = tc << 5; /* 0, 32, 64 */ 140 else if (tc < 5) 141 *tx = (tc + 2) << 4; /* 80, 96 */ 142 else 143 *tx = (tc + 8) << 3; /* 104, 112, 120 */ 144 } else { 145 /* 146 * TCs : TC0 TC1 TC2/3 147 * TxQs/TC: 64 32 16 148 * RxQs/TC: 32 32 32 149 */ 150 *rx = tc << 5; 151 if (tc < 2) 152 *tx = tc << 6; /* 0, 64 */ 153 else 154 *tx = (tc + 4) << 4; /* 96, 112 */ 155 } 156 default: 157 break; 158 } 159 } 160 161 /** 162 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB 163 * @adapter: board private structure to initialize 164 * 165 * Cache the descriptor ring offsets for DCB to the assigned rings. 166 * 167 **/ 168 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter) 169 { 170 struct net_device *dev = adapter->netdev; 171 unsigned int tx_idx, rx_idx; 172 int tc, offset, rss_i, i; 173 u8 num_tcs = netdev_get_num_tc(dev); 174 175 /* verify we have DCB queueing enabled before proceeding */ 176 if (num_tcs <= 1) 177 return false; 178 179 rss_i = adapter->ring_feature[RING_F_RSS].indices; 180 181 for (tc = 0, offset = 0; tc < num_tcs; tc++, offset += rss_i) { 182 ixgbe_get_first_reg_idx(adapter, tc, &tx_idx, &rx_idx); 183 for (i = 0; i < rss_i; i++, tx_idx++, rx_idx++) { 184 adapter->tx_ring[offset + i]->reg_idx = tx_idx; 185 adapter->rx_ring[offset + i]->reg_idx = rx_idx; 186 adapter->tx_ring[offset + i]->dcb_tc = tc; 187 adapter->rx_ring[offset + i]->dcb_tc = tc; 188 } 189 } 190 191 return true; 192 } 193 194 #endif 195 /** 196 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov 197 * @adapter: board private structure to initialize 198 * 199 * SR-IOV doesn't use any descriptor rings but changes the default if 200 * no other mapping is used. 201 * 202 */ 203 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter) 204 { 205 #ifdef IXGBE_FCOE 206 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE]; 207 #endif /* IXGBE_FCOE */ 208 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; 209 struct ixgbe_ring_feature *rss = &adapter->ring_feature[RING_F_RSS]; 210 int i; 211 u16 reg_idx; 212 213 /* only proceed if VMDq is enabled */ 214 if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)) 215 return false; 216 217 /* start at VMDq register offset for SR-IOV enabled setups */ 218 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 219 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) { 220 #ifdef IXGBE_FCOE 221 /* Allow first FCoE queue to be mapped as RSS */ 222 if (fcoe->offset && (i > fcoe->offset)) 223 break; 224 #endif 225 /* If we are greater than indices move to next pool */ 226 if ((reg_idx & ~vmdq->mask) >= rss->indices) 227 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 228 adapter->rx_ring[i]->reg_idx = reg_idx; 229 } 230 231 #ifdef IXGBE_FCOE 232 /* FCoE uses a linear block of queues so just assigning 1:1 */ 233 for (; i < adapter->num_rx_queues; i++, reg_idx++) 234 adapter->rx_ring[i]->reg_idx = reg_idx; 235 236 #endif 237 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 238 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) { 239 #ifdef IXGBE_FCOE 240 /* Allow first FCoE queue to be mapped as RSS */ 241 if (fcoe->offset && (i > fcoe->offset)) 242 break; 243 #endif 244 /* If we are greater than indices move to next pool */ 245 if ((reg_idx & rss->mask) >= rss->indices) 246 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 247 adapter->tx_ring[i]->reg_idx = reg_idx; 248 } 249 250 #ifdef IXGBE_FCOE 251 /* FCoE uses a linear block of queues so just assigning 1:1 */ 252 for (; i < adapter->num_tx_queues; i++, reg_idx++) 253 adapter->tx_ring[i]->reg_idx = reg_idx; 254 255 #endif 256 257 return true; 258 } 259 260 /** 261 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS 262 * @adapter: board private structure to initialize 263 * 264 * Cache the descriptor ring offsets for RSS to the assigned rings. 265 * 266 **/ 267 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter) 268 { 269 int i; 270 271 for (i = 0; i < adapter->num_rx_queues; i++) 272 adapter->rx_ring[i]->reg_idx = i; 273 for (i = 0; i < adapter->num_tx_queues; i++) 274 adapter->tx_ring[i]->reg_idx = i; 275 276 return true; 277 } 278 279 /** 280 * ixgbe_cache_ring_register - Descriptor ring to register mapping 281 * @adapter: board private structure to initialize 282 * 283 * Once we know the feature-set enabled for the device, we'll cache 284 * the register offset the descriptor ring is assigned to. 285 * 286 * Note, the order the various feature calls is important. It must start with 287 * the "most" features enabled at the same time, then trickle down to the 288 * least amount of features turned on at once. 289 **/ 290 static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) 291 { 292 /* start with default case */ 293 adapter->rx_ring[0]->reg_idx = 0; 294 adapter->tx_ring[0]->reg_idx = 0; 295 296 #ifdef CONFIG_IXGBE_DCB 297 if (ixgbe_cache_ring_dcb_sriov(adapter)) 298 return; 299 300 if (ixgbe_cache_ring_dcb(adapter)) 301 return; 302 303 #endif 304 if (ixgbe_cache_ring_sriov(adapter)) 305 return; 306 307 ixgbe_cache_ring_rss(adapter); 308 } 309 310 #define IXGBE_RSS_16Q_MASK 0xF 311 #define IXGBE_RSS_8Q_MASK 0x7 312 #define IXGBE_RSS_4Q_MASK 0x3 313 #define IXGBE_RSS_2Q_MASK 0x1 314 #define IXGBE_RSS_DISABLED_MASK 0x0 315 316 #ifdef CONFIG_IXGBE_DCB 317 /** 318 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB 319 * @adapter: board private structure to initialize 320 * 321 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues 322 * and VM pools where appropriate. Also assign queues based on DCB 323 * priorities and map accordingly.. 324 * 325 **/ 326 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter) 327 { 328 int i; 329 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit; 330 u16 vmdq_m = 0; 331 #ifdef IXGBE_FCOE 332 u16 fcoe_i = 0; 333 #endif 334 u8 tcs = netdev_get_num_tc(adapter->netdev); 335 336 /* verify we have DCB queueing enabled before proceeding */ 337 if (tcs <= 1) 338 return false; 339 340 /* verify we have VMDq enabled before proceeding */ 341 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) 342 return false; 343 344 /* Add starting offset to total pool count */ 345 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset; 346 347 /* 16 pools w/ 8 TC per pool */ 348 if (tcs > 4) { 349 vmdq_i = min_t(u16, vmdq_i, 16); 350 vmdq_m = IXGBE_82599_VMDQ_8Q_MASK; 351 /* 32 pools w/ 4 TC per pool */ 352 } else { 353 vmdq_i = min_t(u16, vmdq_i, 32); 354 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; 355 } 356 357 #ifdef IXGBE_FCOE 358 /* queues in the remaining pools are available for FCoE */ 359 fcoe_i = (128 / __ALIGN_MASK(1, ~vmdq_m)) - vmdq_i; 360 361 #endif 362 /* remove the starting offset from the pool count */ 363 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset; 364 365 /* save features for later use */ 366 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i; 367 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; 368 369 /* 370 * We do not support DCB, VMDq, and RSS all simultaneously 371 * so we will disable RSS since it is the lowest priority 372 */ 373 adapter->ring_feature[RING_F_RSS].indices = 1; 374 adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK; 375 376 /* disable ATR as it is not supported when VMDq is enabled */ 377 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 378 379 adapter->num_rx_pools = vmdq_i; 380 adapter->num_rx_queues_per_pool = tcs; 381 382 adapter->num_tx_queues = vmdq_i * tcs; 383 adapter->num_rx_queues = vmdq_i * tcs; 384 385 #ifdef IXGBE_FCOE 386 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 387 struct ixgbe_ring_feature *fcoe; 388 389 fcoe = &adapter->ring_feature[RING_F_FCOE]; 390 391 /* limit ourselves based on feature limits */ 392 fcoe_i = min_t(u16, fcoe_i, fcoe->limit); 393 394 if (fcoe_i) { 395 /* alloc queues for FCoE separately */ 396 fcoe->indices = fcoe_i; 397 fcoe->offset = vmdq_i * tcs; 398 399 /* add queues to adapter */ 400 adapter->num_tx_queues += fcoe_i; 401 adapter->num_rx_queues += fcoe_i; 402 } else if (tcs > 1) { 403 /* use queue belonging to FcoE TC */ 404 fcoe->indices = 1; 405 fcoe->offset = ixgbe_fcoe_get_tc(adapter); 406 } else { 407 adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; 408 409 fcoe->indices = 0; 410 fcoe->offset = 0; 411 } 412 } 413 414 #endif /* IXGBE_FCOE */ 415 /* configure TC to queue mapping */ 416 for (i = 0; i < tcs; i++) 417 netdev_set_tc_queue(adapter->netdev, i, 1, i); 418 419 return true; 420 } 421 422 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) 423 { 424 struct net_device *dev = adapter->netdev; 425 struct ixgbe_ring_feature *f; 426 int rss_i, rss_m, i; 427 int tcs; 428 429 /* Map queue offset and counts onto allocated tx queues */ 430 tcs = netdev_get_num_tc(dev); 431 432 /* verify we have DCB queueing enabled before proceeding */ 433 if (tcs <= 1) 434 return false; 435 436 /* determine the upper limit for our current DCB mode */ 437 rss_i = dev->num_tx_queues / tcs; 438 if (adapter->hw.mac.type == ixgbe_mac_82598EB) { 439 /* 8 TC w/ 4 queues per TC */ 440 rss_i = min_t(u16, rss_i, 4); 441 rss_m = IXGBE_RSS_4Q_MASK; 442 } else if (tcs > 4) { 443 /* 8 TC w/ 8 queues per TC */ 444 rss_i = min_t(u16, rss_i, 8); 445 rss_m = IXGBE_RSS_8Q_MASK; 446 } else { 447 /* 4 TC w/ 16 queues per TC */ 448 rss_i = min_t(u16, rss_i, 16); 449 rss_m = IXGBE_RSS_16Q_MASK; 450 } 451 452 /* set RSS mask and indices */ 453 f = &adapter->ring_feature[RING_F_RSS]; 454 rss_i = min_t(int, rss_i, f->limit); 455 f->indices = rss_i; 456 f->mask = rss_m; 457 458 /* disable ATR as it is not supported when multiple TCs are enabled */ 459 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 460 461 #ifdef IXGBE_FCOE 462 /* FCoE enabled queues require special configuration indexed 463 * by feature specific indices and offset. Here we map FCoE 464 * indices onto the DCB queue pairs allowing FCoE to own 465 * configuration later. 466 */ 467 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 468 u8 tc = ixgbe_fcoe_get_tc(adapter); 469 470 f = &adapter->ring_feature[RING_F_FCOE]; 471 f->indices = min_t(u16, rss_i, f->limit); 472 f->offset = rss_i * tc; 473 } 474 475 #endif /* IXGBE_FCOE */ 476 for (i = 0; i < tcs; i++) 477 netdev_set_tc_queue(dev, i, rss_i, rss_i * i); 478 479 adapter->num_tx_queues = rss_i * tcs; 480 adapter->num_rx_queues = rss_i * tcs; 481 482 return true; 483 } 484 485 #endif 486 /** 487 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices 488 * @adapter: board private structure to initialize 489 * 490 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues 491 * and VM pools where appropriate. If RSS is available, then also try and 492 * enable RSS and map accordingly. 493 * 494 **/ 495 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) 496 { 497 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit; 498 u16 vmdq_m = 0; 499 u16 rss_i = adapter->ring_feature[RING_F_RSS].limit; 500 u16 rss_m = IXGBE_RSS_DISABLED_MASK; 501 #ifdef IXGBE_FCOE 502 u16 fcoe_i = 0; 503 #endif 504 bool pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1); 505 506 /* only proceed if SR-IOV is enabled */ 507 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) 508 return false; 509 510 /* Add starting offset to total pool count */ 511 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset; 512 513 /* double check we are limited to maximum pools */ 514 vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); 515 516 /* 64 pool mode with 2 queues per pool */ 517 if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) { 518 vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; 519 rss_m = IXGBE_RSS_2Q_MASK; 520 rss_i = min_t(u16, rss_i, 2); 521 /* 32 pool mode with 4 queues per pool */ 522 } else { 523 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; 524 rss_m = IXGBE_RSS_4Q_MASK; 525 rss_i = 4; 526 } 527 528 #ifdef IXGBE_FCOE 529 /* queues in the remaining pools are available for FCoE */ 530 fcoe_i = 128 - (vmdq_i * __ALIGN_MASK(1, ~vmdq_m)); 531 532 #endif 533 /* remove the starting offset from the pool count */ 534 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset; 535 536 /* save features for later use */ 537 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i; 538 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; 539 540 /* limit RSS based on user input and save for later use */ 541 adapter->ring_feature[RING_F_RSS].indices = rss_i; 542 adapter->ring_feature[RING_F_RSS].mask = rss_m; 543 544 adapter->num_rx_pools = vmdq_i; 545 adapter->num_rx_queues_per_pool = rss_i; 546 547 adapter->num_rx_queues = vmdq_i * rss_i; 548 adapter->num_tx_queues = vmdq_i * rss_i; 549 550 /* disable ATR as it is not supported when VMDq is enabled */ 551 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 552 553 #ifdef IXGBE_FCOE 554 /* 555 * FCoE can use rings from adjacent buffers to allow RSS 556 * like behavior. To account for this we need to add the 557 * FCoE indices to the total ring count. 558 */ 559 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 560 struct ixgbe_ring_feature *fcoe; 561 562 fcoe = &adapter->ring_feature[RING_F_FCOE]; 563 564 /* limit ourselves based on feature limits */ 565 fcoe_i = min_t(u16, fcoe_i, fcoe->limit); 566 567 if (vmdq_i > 1 && fcoe_i) { 568 /* alloc queues for FCoE separately */ 569 fcoe->indices = fcoe_i; 570 fcoe->offset = vmdq_i * rss_i; 571 } else { 572 /* merge FCoE queues with RSS queues */ 573 fcoe_i = min_t(u16, fcoe_i + rss_i, num_online_cpus()); 574 575 /* limit indices to rss_i if MSI-X is disabled */ 576 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) 577 fcoe_i = rss_i; 578 579 /* attempt to reserve some queues for just FCoE */ 580 fcoe->indices = min_t(u16, fcoe_i, fcoe->limit); 581 fcoe->offset = fcoe_i - fcoe->indices; 582 583 fcoe_i -= rss_i; 584 } 585 586 /* add queues to adapter */ 587 adapter->num_tx_queues += fcoe_i; 588 adapter->num_rx_queues += fcoe_i; 589 } 590 591 #endif 592 return true; 593 } 594 595 /** 596 * ixgbe_set_rss_queues - Allocate queues for RSS 597 * @adapter: board private structure to initialize 598 * 599 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try 600 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. 601 * 602 **/ 603 static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) 604 { 605 struct ixgbe_ring_feature *f; 606 u16 rss_i; 607 608 /* set mask for 16 queue limit of RSS */ 609 f = &adapter->ring_feature[RING_F_RSS]; 610 rss_i = f->limit; 611 612 f->indices = rss_i; 613 f->mask = IXGBE_RSS_16Q_MASK; 614 615 /* disable ATR by default, it will be configured below */ 616 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 617 618 /* 619 * Use Flow Director in addition to RSS to ensure the best 620 * distribution of flows across cores, even when an FDIR flow 621 * isn't matched. 622 */ 623 if (rss_i > 1 && adapter->atr_sample_rate) { 624 f = &adapter->ring_feature[RING_F_FDIR]; 625 626 rss_i = f->indices = f->limit; 627 628 if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) 629 adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; 630 } 631 632 #ifdef IXGBE_FCOE 633 /* 634 * FCoE can exist on the same rings as standard network traffic 635 * however it is preferred to avoid that if possible. In order 636 * to get the best performance we allocate as many FCoE queues 637 * as we can and we place them at the end of the ring array to 638 * avoid sharing queues with standard RSS on systems with 24 or 639 * more CPUs. 640 */ 641 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 642 struct net_device *dev = adapter->netdev; 643 u16 fcoe_i; 644 645 f = &adapter->ring_feature[RING_F_FCOE]; 646 647 /* merge FCoE queues with RSS queues */ 648 fcoe_i = min_t(u16, f->limit + rss_i, num_online_cpus()); 649 fcoe_i = min_t(u16, fcoe_i, dev->num_tx_queues); 650 651 /* limit indices to rss_i if MSI-X is disabled */ 652 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) 653 fcoe_i = rss_i; 654 655 /* attempt to reserve some queues for just FCoE */ 656 f->indices = min_t(u16, fcoe_i, f->limit); 657 f->offset = fcoe_i - f->indices; 658 rss_i = max_t(u16, fcoe_i, rss_i); 659 } 660 661 #endif /* IXGBE_FCOE */ 662 adapter->num_rx_queues = rss_i; 663 adapter->num_tx_queues = rss_i; 664 665 return true; 666 } 667 668 /** 669 * ixgbe_set_num_queues - Allocate queues for device, feature dependent 670 * @adapter: board private structure to initialize 671 * 672 * This is the top level queue allocation routine. The order here is very 673 * important, starting with the "most" number of features turned on at once, 674 * and ending with the smallest set of features. This way large combinations 675 * can be allocated if they're turned on, and smaller combinations are the 676 * fallthrough conditions. 677 * 678 **/ 679 static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) 680 { 681 /* Start with base case */ 682 adapter->num_rx_queues = 1; 683 adapter->num_tx_queues = 1; 684 adapter->num_rx_pools = adapter->num_rx_queues; 685 adapter->num_rx_queues_per_pool = 1; 686 687 #ifdef CONFIG_IXGBE_DCB 688 if (ixgbe_set_dcb_sriov_queues(adapter)) 689 return; 690 691 if (ixgbe_set_dcb_queues(adapter)) 692 return; 693 694 #endif 695 if (ixgbe_set_sriov_queues(adapter)) 696 return; 697 698 ixgbe_set_rss_queues(adapter); 699 } 700 701 /** 702 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors 703 * @adapter: board private structure 704 * 705 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will 706 * return a negative error code if unable to acquire MSI-X vectors for any 707 * reason. 708 */ 709 static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter) 710 { 711 struct ixgbe_hw *hw = &adapter->hw; 712 int i, vectors, vector_threshold; 713 714 /* We start by asking for one vector per queue pair */ 715 vectors = max(adapter->num_rx_queues, adapter->num_tx_queues); 716 717 /* It is easy to be greedy for MSI-X vectors. However, it really 718 * doesn't do much good if we have a lot more vectors than CPUs. We'll 719 * be somewhat conservative and only ask for (roughly) the same number 720 * of vectors as there are CPUs. 721 */ 722 vectors = min_t(int, vectors, num_online_cpus()); 723 724 /* Some vectors are necessary for non-queue interrupts */ 725 vectors += NON_Q_VECTORS; 726 727 /* Hardware can only support a maximum of hw.mac->max_msix_vectors. 728 * With features such as RSS and VMDq, we can easily surpass the 729 * number of Rx and Tx descriptor queues supported by our device. 730 * Thus, we cap the maximum in the rare cases where the CPU count also 731 * exceeds our vector limit 732 */ 733 vectors = min_t(int, vectors, hw->mac.max_msix_vectors); 734 735 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0] 736 * handler, and (2) an Other (Link Status Change, etc.) handler. 737 */ 738 vector_threshold = MIN_MSIX_COUNT; 739 740 adapter->msix_entries = kcalloc(vectors, 741 sizeof(struct msix_entry), 742 GFP_KERNEL); 743 if (!adapter->msix_entries) 744 return -ENOMEM; 745 746 for (i = 0; i < vectors; i++) 747 adapter->msix_entries[i].entry = i; 748 749 vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries, 750 vector_threshold, vectors); 751 752 if (vectors < 0) { 753 /* A negative count of allocated vectors indicates an error in 754 * acquiring within the specified range of MSI-X vectors 755 */ 756 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n", 757 vectors); 758 759 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; 760 kfree(adapter->msix_entries); 761 adapter->msix_entries = NULL; 762 763 return vectors; 764 } 765 766 /* we successfully allocated some number of vectors within our 767 * requested range. 768 */ 769 adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; 770 771 /* Adjust for only the vectors we'll use, which is minimum 772 * of max_q_vectors, or the number of vectors we were allocated. 773 */ 774 vectors -= NON_Q_VECTORS; 775 adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors); 776 777 return 0; 778 } 779 780 static void ixgbe_add_ring(struct ixgbe_ring *ring, 781 struct ixgbe_ring_container *head) 782 { 783 ring->next = head->ring; 784 head->ring = ring; 785 head->count++; 786 } 787 788 /** 789 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector 790 * @adapter: board private structure to initialize 791 * @v_count: q_vectors allocated on adapter, used for ring interleaving 792 * @v_idx: index of vector in adapter struct 793 * @txr_count: total number of Tx rings to allocate 794 * @txr_idx: index of first Tx ring to allocate 795 * @rxr_count: total number of Rx rings to allocate 796 * @rxr_idx: index of first Rx ring to allocate 797 * 798 * We allocate one q_vector. If allocation fails we return -ENOMEM. 799 **/ 800 static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, 801 int v_count, int v_idx, 802 int txr_count, int txr_idx, 803 int rxr_count, int rxr_idx) 804 { 805 struct ixgbe_q_vector *q_vector; 806 struct ixgbe_ring *ring; 807 int node = NUMA_NO_NODE; 808 int cpu = -1; 809 int ring_count, size; 810 u8 tcs = netdev_get_num_tc(adapter->netdev); 811 812 ring_count = txr_count + rxr_count; 813 size = sizeof(struct ixgbe_q_vector) + 814 (sizeof(struct ixgbe_ring) * ring_count); 815 816 /* customize cpu for Flow Director mapping */ 817 if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) { 818 u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; 819 if (rss_i > 1 && adapter->atr_sample_rate) { 820 if (cpu_online(v_idx)) { 821 cpu = v_idx; 822 node = cpu_to_node(cpu); 823 } 824 } 825 } 826 827 /* allocate q_vector and rings */ 828 q_vector = kzalloc_node(size, GFP_KERNEL, node); 829 if (!q_vector) 830 q_vector = kzalloc(size, GFP_KERNEL); 831 if (!q_vector) 832 return -ENOMEM; 833 834 /* setup affinity mask and node */ 835 if (cpu != -1) 836 cpumask_set_cpu(cpu, &q_vector->affinity_mask); 837 q_vector->numa_node = node; 838 839 #ifdef CONFIG_IXGBE_DCA 840 /* initialize CPU for DCA */ 841 q_vector->cpu = -1; 842 843 #endif 844 /* initialize NAPI */ 845 netif_napi_add(adapter->netdev, &q_vector->napi, 846 ixgbe_poll, 64); 847 848 #ifdef CONFIG_NET_RX_BUSY_POLL 849 /* initialize busy poll */ 850 atomic_set(&q_vector->state, IXGBE_QV_STATE_DISABLE); 851 852 #endif 853 /* tie q_vector and adapter together */ 854 adapter->q_vector[v_idx] = q_vector; 855 q_vector->adapter = adapter; 856 q_vector->v_idx = v_idx; 857 858 /* initialize work limits */ 859 q_vector->tx.work_limit = adapter->tx_work_limit; 860 861 /* initialize pointer to rings */ 862 ring = q_vector->ring; 863 864 /* intialize ITR */ 865 if (txr_count && !rxr_count) { 866 /* tx only vector */ 867 if (adapter->tx_itr_setting == 1) 868 q_vector->itr = IXGBE_12K_ITR; 869 else 870 q_vector->itr = adapter->tx_itr_setting; 871 } else { 872 /* rx or rx/tx vector */ 873 if (adapter->rx_itr_setting == 1) 874 q_vector->itr = IXGBE_20K_ITR; 875 else 876 q_vector->itr = adapter->rx_itr_setting; 877 } 878 879 while (txr_count) { 880 /* assign generic ring traits */ 881 ring->dev = &adapter->pdev->dev; 882 ring->netdev = adapter->netdev; 883 884 /* configure backlink on ring */ 885 ring->q_vector = q_vector; 886 887 /* update q_vector Tx values */ 888 ixgbe_add_ring(ring, &q_vector->tx); 889 890 /* apply Tx specific ring traits */ 891 ring->count = adapter->tx_ring_count; 892 if (adapter->num_rx_pools > 1) 893 ring->queue_index = 894 txr_idx % adapter->num_rx_queues_per_pool; 895 else 896 ring->queue_index = txr_idx; 897 898 /* assign ring to adapter */ 899 adapter->tx_ring[txr_idx] = ring; 900 901 /* update count and index */ 902 txr_count--; 903 txr_idx += v_count; 904 905 /* push pointer to next ring */ 906 ring++; 907 } 908 909 while (rxr_count) { 910 /* assign generic ring traits */ 911 ring->dev = &adapter->pdev->dev; 912 ring->netdev = adapter->netdev; 913 914 /* configure backlink on ring */ 915 ring->q_vector = q_vector; 916 917 /* update q_vector Rx values */ 918 ixgbe_add_ring(ring, &q_vector->rx); 919 920 /* 921 * 82599 errata, UDP frames with a 0 checksum 922 * can be marked as checksum errors. 923 */ 924 if (adapter->hw.mac.type == ixgbe_mac_82599EB) 925 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state); 926 927 #ifdef IXGBE_FCOE 928 if (adapter->netdev->features & NETIF_F_FCOE_MTU) { 929 struct ixgbe_ring_feature *f; 930 f = &adapter->ring_feature[RING_F_FCOE]; 931 if ((rxr_idx >= f->offset) && 932 (rxr_idx < f->offset + f->indices)) 933 set_bit(__IXGBE_RX_FCOE, &ring->state); 934 } 935 936 #endif /* IXGBE_FCOE */ 937 /* apply Rx specific ring traits */ 938 ring->count = adapter->rx_ring_count; 939 if (adapter->num_rx_pools > 1) 940 ring->queue_index = 941 rxr_idx % adapter->num_rx_queues_per_pool; 942 else 943 ring->queue_index = rxr_idx; 944 945 /* assign ring to adapter */ 946 adapter->rx_ring[rxr_idx] = ring; 947 948 /* update count and index */ 949 rxr_count--; 950 rxr_idx += v_count; 951 952 /* push pointer to next ring */ 953 ring++; 954 } 955 956 return 0; 957 } 958 959 /** 960 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector 961 * @adapter: board private structure to initialize 962 * @v_idx: Index of vector to be freed 963 * 964 * This function frees the memory allocated to the q_vector. In addition if 965 * NAPI is enabled it will delete any references to the NAPI struct prior 966 * to freeing the q_vector. 967 **/ 968 static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) 969 { 970 struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx]; 971 struct ixgbe_ring *ring; 972 973 ixgbe_for_each_ring(ring, q_vector->tx) 974 adapter->tx_ring[ring->queue_index] = NULL; 975 976 ixgbe_for_each_ring(ring, q_vector->rx) 977 adapter->rx_ring[ring->queue_index] = NULL; 978 979 adapter->q_vector[v_idx] = NULL; 980 napi_hash_del(&q_vector->napi); 981 netif_napi_del(&q_vector->napi); 982 983 /* 984 * ixgbe_get_stats64() might access the rings on this vector, 985 * we must wait a grace period before freeing it. 986 */ 987 kfree_rcu(q_vector, rcu); 988 } 989 990 /** 991 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors 992 * @adapter: board private structure to initialize 993 * 994 * We allocate one q_vector per queue interrupt. If allocation fails we 995 * return -ENOMEM. 996 **/ 997 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter) 998 { 999 int q_vectors = adapter->num_q_vectors; 1000 int rxr_remaining = adapter->num_rx_queues; 1001 int txr_remaining = adapter->num_tx_queues; 1002 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 1003 int err; 1004 1005 /* only one q_vector if MSI-X is disabled. */ 1006 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) 1007 q_vectors = 1; 1008 1009 if (q_vectors >= (rxr_remaining + txr_remaining)) { 1010 for (; rxr_remaining; v_idx++) { 1011 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx, 1012 0, 0, 1, rxr_idx); 1013 1014 if (err) 1015 goto err_out; 1016 1017 /* update counts and index */ 1018 rxr_remaining--; 1019 rxr_idx++; 1020 } 1021 } 1022 1023 for (; v_idx < q_vectors; v_idx++) { 1024 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 1025 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 1026 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx, 1027 tqpv, txr_idx, 1028 rqpv, rxr_idx); 1029 1030 if (err) 1031 goto err_out; 1032 1033 /* update counts and index */ 1034 rxr_remaining -= rqpv; 1035 txr_remaining -= tqpv; 1036 rxr_idx++; 1037 txr_idx++; 1038 } 1039 1040 return 0; 1041 1042 err_out: 1043 adapter->num_tx_queues = 0; 1044 adapter->num_rx_queues = 0; 1045 adapter->num_q_vectors = 0; 1046 1047 while (v_idx--) 1048 ixgbe_free_q_vector(adapter, v_idx); 1049 1050 return -ENOMEM; 1051 } 1052 1053 /** 1054 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors 1055 * @adapter: board private structure to initialize 1056 * 1057 * This function frees the memory allocated to the q_vectors. In addition if 1058 * NAPI is enabled it will delete any references to the NAPI struct prior 1059 * to freeing the q_vector. 1060 **/ 1061 static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter) 1062 { 1063 int v_idx = adapter->num_q_vectors; 1064 1065 adapter->num_tx_queues = 0; 1066 adapter->num_rx_queues = 0; 1067 adapter->num_q_vectors = 0; 1068 1069 while (v_idx--) 1070 ixgbe_free_q_vector(adapter, v_idx); 1071 } 1072 1073 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) 1074 { 1075 if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { 1076 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; 1077 pci_disable_msix(adapter->pdev); 1078 kfree(adapter->msix_entries); 1079 adapter->msix_entries = NULL; 1080 } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { 1081 adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; 1082 pci_disable_msi(adapter->pdev); 1083 } 1084 } 1085 1086 /** 1087 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported 1088 * @adapter: board private structure to initialize 1089 * 1090 * Attempt to configure the interrupts using the best available 1091 * capabilities of the hardware and the kernel. 1092 **/ 1093 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter) 1094 { 1095 int err; 1096 1097 /* We will try to get MSI-X interrupts first */ 1098 if (!ixgbe_acquire_msix_vectors(adapter)) 1099 return; 1100 1101 /* At this point, we do not have MSI-X capabilities. We need to 1102 * reconfigure or disable various features which require MSI-X 1103 * capability. 1104 */ 1105 1106 /* Disable DCB unless we only have a single traffic class */ 1107 if (netdev_get_num_tc(adapter->netdev) > 1) { 1108 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n"); 1109 netdev_reset_tc(adapter->netdev); 1110 1111 if (adapter->hw.mac.type == ixgbe_mac_82598EB) 1112 adapter->hw.fc.requested_mode = adapter->last_lfc_mode; 1113 1114 adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; 1115 adapter->temp_dcb_cfg.pfc_mode_enable = false; 1116 adapter->dcb_cfg.pfc_mode_enable = false; 1117 } 1118 1119 adapter->dcb_cfg.num_tcs.pg_tcs = 1; 1120 adapter->dcb_cfg.num_tcs.pfc_tcs = 1; 1121 1122 /* Disable SR-IOV support */ 1123 e_dev_warn("Disabling SR-IOV support\n"); 1124 ixgbe_disable_sriov(adapter); 1125 1126 /* Disable RSS */ 1127 e_dev_warn("Disabling RSS support\n"); 1128 adapter->ring_feature[RING_F_RSS].limit = 1; 1129 1130 /* recalculate number of queues now that many features have been 1131 * changed or disabled. 1132 */ 1133 ixgbe_set_num_queues(adapter); 1134 adapter->num_q_vectors = 1; 1135 1136 err = pci_enable_msi(adapter->pdev); 1137 if (err) 1138 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n", 1139 err); 1140 else 1141 adapter->flags |= IXGBE_FLAG_MSI_ENABLED; 1142 } 1143 1144 /** 1145 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme 1146 * @adapter: board private structure to initialize 1147 * 1148 * We determine which interrupt scheme to use based on... 1149 * - Kernel support (MSI, MSI-X) 1150 * - which can be user-defined (via MODULE_PARAM) 1151 * - Hardware queue count (num_*_queues) 1152 * - defined by miscellaneous hardware support/features (RSS, etc.) 1153 **/ 1154 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) 1155 { 1156 int err; 1157 1158 /* Number of supported queues */ 1159 ixgbe_set_num_queues(adapter); 1160 1161 /* Set interrupt mode */ 1162 ixgbe_set_interrupt_capability(adapter); 1163 1164 err = ixgbe_alloc_q_vectors(adapter); 1165 if (err) { 1166 e_dev_err("Unable to allocate memory for queue vectors\n"); 1167 goto err_alloc_q_vectors; 1168 } 1169 1170 ixgbe_cache_ring_register(adapter); 1171 1172 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", 1173 (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", 1174 adapter->num_rx_queues, adapter->num_tx_queues); 1175 1176 set_bit(__IXGBE_DOWN, &adapter->state); 1177 1178 return 0; 1179 1180 err_alloc_q_vectors: 1181 ixgbe_reset_interrupt_capability(adapter); 1182 return err; 1183 } 1184 1185 /** 1186 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings 1187 * @adapter: board private structure to clear interrupt scheme on 1188 * 1189 * We go through and clear interrupt specific resources and reset the structure 1190 * to pre-load conditions 1191 **/ 1192 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter) 1193 { 1194 adapter->num_tx_queues = 0; 1195 adapter->num_rx_queues = 0; 1196 1197 ixgbe_free_q_vectors(adapter); 1198 ixgbe_reset_interrupt_capability(adapter); 1199 } 1200 1201 void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, 1202 u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx) 1203 { 1204 struct ixgbe_adv_tx_context_desc *context_desc; 1205 u16 i = tx_ring->next_to_use; 1206 1207 context_desc = IXGBE_TX_CTXTDESC(tx_ring, i); 1208 1209 i++; 1210 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1211 1212 /* set bits to identify this as an advanced context descriptor */ 1213 type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 1214 1215 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1216 context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); 1217 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1218 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1219 } 1220 1221