1 /******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Driver 4 * Copyright(c) 2013 - 2014 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27 #include "i40e.h" 28 29 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 30 u32 td_tag) 31 { 32 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 33 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 34 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 35 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 36 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 37 } 38 39 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 40 /** 41 * i40e_program_fdir_filter - Program a Flow Director filter 42 * @fdir_input: Packet data that will be filter parameters 43 * @pf: The pf pointer 44 * @add: True for add/update, False for remove 45 **/ 46 int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, 47 struct i40e_pf *pf, bool add) 48 { 49 struct i40e_filter_program_desc *fdir_desc; 50 struct i40e_tx_buffer *tx_buf; 51 struct i40e_tx_desc *tx_desc; 52 struct i40e_ring *tx_ring; 53 unsigned int fpt, dcc; 54 struct i40e_vsi *vsi; 55 struct device *dev; 56 dma_addr_t dma; 57 u32 td_cmd = 0; 58 u16 i; 59 60 /* find existing FDIR VSI */ 61 vsi = NULL; 62 for (i = 0; i < pf->hw.func_caps.num_vsis; i++) 63 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) 64 vsi = pf->vsi[i]; 65 if (!vsi) 66 return -ENOENT; 67 68 tx_ring = vsi->tx_rings[0]; 69 dev = tx_ring->dev; 70 71 dma = dma_map_single(dev, fdir_data->raw_packet, 72 I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE); 73 if (dma_mapping_error(dev, dma)) 74 goto dma_fail; 75 76 /* grab the next descriptor */ 77 i = tx_ring->next_to_use; 78 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 79 80 tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; 81 82 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 83 I40E_TXD_FLTR_QW0_QINDEX_MASK; 84 85 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & 86 I40E_TXD_FLTR_QW0_FLEXOFF_MASK; 87 88 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & 89 I40E_TXD_FLTR_QW0_PCTYPE_MASK; 90 91 /* Use LAN VSI Id if not programmed by user */ 92 if (fdir_data->dest_vsi == 0) 93 fpt |= (pf->vsi[pf->lan_vsi]->id) << 94 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; 95 else 96 fpt |= ((u32)fdir_data->dest_vsi << 97 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & 98 I40E_TXD_FLTR_QW0_DEST_VSI_MASK; 99 100 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); 101 102 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; 103 104 if (add) 105 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 106 I40E_TXD_FLTR_QW1_PCMD_SHIFT; 107 else 108 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 109 I40E_TXD_FLTR_QW1_PCMD_SHIFT; 110 111 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & 112 I40E_TXD_FLTR_QW1_DEST_MASK; 113 114 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & 115 I40E_TXD_FLTR_QW1_FD_STATUS_MASK; 116 117 if (fdir_data->cnt_index != 0) { 118 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 119 dcc |= ((u32)fdir_data->cnt_index << 120 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 121 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 122 } 123 124 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); 125 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); 126 127 /* Now program a dummy descriptor */ 128 i = tx_ring->next_to_use; 129 tx_desc = I40E_TX_DESC(tx_ring, i); 130 tx_buf = &tx_ring->tx_bi[i]; 131 132 tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; 133 134 /* record length, and DMA address */ 135 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP); 136 dma_unmap_addr_set(tx_buf, dma, dma); 137 138 tx_desc->buffer_addr = cpu_to_le64(dma); 139 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; 140 141 tx_desc->cmd_type_offset_bsz = 142 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0); 143 144 /* set the timestamp */ 145 tx_buf->time_stamp = jiffies; 146 147 /* Force memory writes to complete before letting h/w 148 * know there are new descriptors to fetch. (Only 149 * applicable for weak-ordered memory model archs, 150 * such as IA-64). 151 */ 152 wmb(); 153 154 /* Mark the data descriptor to be watched */ 155 tx_buf->next_to_watch = tx_desc; 156 157 writel(tx_ring->next_to_use, tx_ring->tail); 158 return 0; 159 160 dma_fail: 161 return -1; 162 } 163 164 /** 165 * i40e_fd_handle_status - check the Programming Status for FD 166 * @rx_ring: the Rx ring for this descriptor 167 * @qw: the descriptor data 168 * @prog_id: the id originally used for programming 169 * 170 * This is used to verify if the FD programming or invalidation 171 * requested by SW to the HW is successful or not and take actions accordingly. 172 **/ 173 static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id) 174 { 175 struct pci_dev *pdev = rx_ring->vsi->back->pdev; 176 u32 error; 177 178 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> 179 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; 180 181 /* for now just print the Status */ 182 dev_info(&pdev->dev, "FD programming id %02x, Status %08x\n", 183 prog_id, error); 184 } 185 186 /** 187 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 188 * @ring: the ring that owns the buffer 189 * @tx_buffer: the buffer to free 190 **/ 191 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 192 struct i40e_tx_buffer *tx_buffer) 193 { 194 if (tx_buffer->skb) { 195 dev_kfree_skb_any(tx_buffer->skb); 196 if (dma_unmap_len(tx_buffer, len)) 197 dma_unmap_single(ring->dev, 198 dma_unmap_addr(tx_buffer, dma), 199 dma_unmap_len(tx_buffer, len), 200 DMA_TO_DEVICE); 201 } else if (dma_unmap_len(tx_buffer, len)) { 202 dma_unmap_page(ring->dev, 203 dma_unmap_addr(tx_buffer, dma), 204 dma_unmap_len(tx_buffer, len), 205 DMA_TO_DEVICE); 206 } 207 tx_buffer->next_to_watch = NULL; 208 tx_buffer->skb = NULL; 209 dma_unmap_len_set(tx_buffer, len, 0); 210 /* tx_buffer must be completely set up in the transmit path */ 211 } 212 213 /** 214 * i40e_clean_tx_ring - Free any empty Tx buffers 215 * @tx_ring: ring to be cleaned 216 **/ 217 void i40e_clean_tx_ring(struct i40e_ring *tx_ring) 218 { 219 unsigned long bi_size; 220 u16 i; 221 222 /* ring already cleared, nothing to do */ 223 if (!tx_ring->tx_bi) 224 return; 225 226 /* Free all the Tx ring sk_buffs */ 227 for (i = 0; i < tx_ring->count; i++) 228 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 229 230 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 231 memset(tx_ring->tx_bi, 0, bi_size); 232 233 /* Zero out the descriptor ring */ 234 memset(tx_ring->desc, 0, tx_ring->size); 235 236 tx_ring->next_to_use = 0; 237 tx_ring->next_to_clean = 0; 238 239 if (!tx_ring->netdev) 240 return; 241 242 /* cleanup Tx queue statistics */ 243 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 244 tx_ring->queue_index)); 245 } 246 247 /** 248 * i40e_free_tx_resources - Free Tx resources per queue 249 * @tx_ring: Tx descriptor ring for a specific queue 250 * 251 * Free all transmit software resources 252 **/ 253 void i40e_free_tx_resources(struct i40e_ring *tx_ring) 254 { 255 i40e_clean_tx_ring(tx_ring); 256 kfree(tx_ring->tx_bi); 257 tx_ring->tx_bi = NULL; 258 259 if (tx_ring->desc) { 260 dma_free_coherent(tx_ring->dev, tx_ring->size, 261 tx_ring->desc, tx_ring->dma); 262 tx_ring->desc = NULL; 263 } 264 } 265 266 /** 267 * i40e_get_tx_pending - how many tx descriptors not processed 268 * @tx_ring: the ring of descriptors 269 * 270 * Since there is no access to the ring head register 271 * in XL710, we need to use our local copies 272 **/ 273 static u32 i40e_get_tx_pending(struct i40e_ring *ring) 274 { 275 u32 ntu = ((ring->next_to_clean <= ring->next_to_use) 276 ? ring->next_to_use 277 : ring->next_to_use + ring->count); 278 return ntu - ring->next_to_clean; 279 } 280 281 /** 282 * i40e_check_tx_hang - Is there a hang in the Tx queue 283 * @tx_ring: the ring of descriptors 284 **/ 285 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) 286 { 287 u32 tx_pending = i40e_get_tx_pending(tx_ring); 288 bool ret = false; 289 290 clear_check_for_tx_hang(tx_ring); 291 292 /* Check for a hung queue, but be thorough. This verifies 293 * that a transmit has been completed since the previous 294 * check AND there is at least one packet pending. The 295 * ARMED bit is set to indicate a potential hang. The 296 * bit is cleared if a pause frame is received to remove 297 * false hang detection due to PFC or 802.3x frames. By 298 * requiring this to fail twice we avoid races with 299 * PFC clearing the ARMED bit and conditions where we 300 * run the check_tx_hang logic with a transmit completion 301 * pending but without time to complete it yet. 302 */ 303 if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && 304 tx_pending) { 305 /* make sure it is true for two checks in a row */ 306 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, 307 &tx_ring->state); 308 } else { 309 /* update completed stats and disarm the hang check */ 310 tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; 311 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); 312 } 313 314 return ret; 315 } 316 317 /** 318 * i40e_clean_tx_irq - Reclaim resources after transmit completes 319 * @tx_ring: tx ring to clean 320 * @budget: how many cleans we're allowed 321 * 322 * Returns true if there's any budget left (e.g. the clean is finished) 323 **/ 324 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) 325 { 326 u16 i = tx_ring->next_to_clean; 327 struct i40e_tx_buffer *tx_buf; 328 struct i40e_tx_desc *tx_desc; 329 unsigned int total_packets = 0; 330 unsigned int total_bytes = 0; 331 332 tx_buf = &tx_ring->tx_bi[i]; 333 tx_desc = I40E_TX_DESC(tx_ring, i); 334 i -= tx_ring->count; 335 336 do { 337 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 338 339 /* if next_to_watch is not set then there is no work pending */ 340 if (!eop_desc) 341 break; 342 343 /* prevent any other reads prior to eop_desc */ 344 read_barrier_depends(); 345 346 /* if the descriptor isn't done, no work yet to do */ 347 if (!(eop_desc->cmd_type_offset_bsz & 348 cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) 349 break; 350 351 /* clear next_to_watch to prevent false hangs */ 352 tx_buf->next_to_watch = NULL; 353 354 /* update the statistics for this packet */ 355 total_bytes += tx_buf->bytecount; 356 total_packets += tx_buf->gso_segs; 357 358 /* free the skb */ 359 dev_kfree_skb_any(tx_buf->skb); 360 361 /* unmap skb header data */ 362 dma_unmap_single(tx_ring->dev, 363 dma_unmap_addr(tx_buf, dma), 364 dma_unmap_len(tx_buf, len), 365 DMA_TO_DEVICE); 366 367 /* clear tx_buffer data */ 368 tx_buf->skb = NULL; 369 dma_unmap_len_set(tx_buf, len, 0); 370 371 /* unmap remaining buffers */ 372 while (tx_desc != eop_desc) { 373 374 tx_buf++; 375 tx_desc++; 376 i++; 377 if (unlikely(!i)) { 378 i -= tx_ring->count; 379 tx_buf = tx_ring->tx_bi; 380 tx_desc = I40E_TX_DESC(tx_ring, 0); 381 } 382 383 /* unmap any remaining paged data */ 384 if (dma_unmap_len(tx_buf, len)) { 385 dma_unmap_page(tx_ring->dev, 386 dma_unmap_addr(tx_buf, dma), 387 dma_unmap_len(tx_buf, len), 388 DMA_TO_DEVICE); 389 dma_unmap_len_set(tx_buf, len, 0); 390 } 391 } 392 393 /* move us one more past the eop_desc for start of next pkt */ 394 tx_buf++; 395 tx_desc++; 396 i++; 397 if (unlikely(!i)) { 398 i -= tx_ring->count; 399 tx_buf = tx_ring->tx_bi; 400 tx_desc = I40E_TX_DESC(tx_ring, 0); 401 } 402 403 /* update budget accounting */ 404 budget--; 405 } while (likely(budget)); 406 407 i += tx_ring->count; 408 tx_ring->next_to_clean = i; 409 u64_stats_update_begin(&tx_ring->syncp); 410 tx_ring->stats.bytes += total_bytes; 411 tx_ring->stats.packets += total_packets; 412 u64_stats_update_end(&tx_ring->syncp); 413 tx_ring->q_vector->tx.total_bytes += total_bytes; 414 tx_ring->q_vector->tx.total_packets += total_packets; 415 416 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { 417 /* schedule immediate reset if we believe we hung */ 418 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" 419 " VSI <%d>\n" 420 " Tx Queue <%d>\n" 421 " next_to_use <%x>\n" 422 " next_to_clean <%x>\n", 423 tx_ring->vsi->seid, 424 tx_ring->queue_index, 425 tx_ring->next_to_use, i); 426 dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n" 427 " time_stamp <%lx>\n" 428 " jiffies <%lx>\n", 429 tx_ring->tx_bi[i].time_stamp, jiffies); 430 431 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 432 433 dev_info(tx_ring->dev, 434 "tx hang detected on queue %d, resetting adapter\n", 435 tx_ring->queue_index); 436 437 tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev); 438 439 /* the adapter is about to reset, no point in enabling stuff */ 440 return true; 441 } 442 443 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, 444 tx_ring->queue_index), 445 total_packets, total_bytes); 446 447 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 448 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 449 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 450 /* Make sure that anybody stopping the queue after this 451 * sees the new next_to_clean. 452 */ 453 smp_mb(); 454 if (__netif_subqueue_stopped(tx_ring->netdev, 455 tx_ring->queue_index) && 456 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) { 457 netif_wake_subqueue(tx_ring->netdev, 458 tx_ring->queue_index); 459 ++tx_ring->tx_stats.restart_queue; 460 } 461 } 462 463 return budget > 0; 464 } 465 466 /** 467 * i40e_set_new_dynamic_itr - Find new ITR level 468 * @rc: structure containing ring performance data 469 * 470 * Stores a new ITR value based on packets and byte counts during 471 * the last interrupt. The advantage of per interrupt computation 472 * is faster updates and more accurate ITR for the current traffic 473 * pattern. Constants in this function were computed based on 474 * theoretical maximum wire speed and thresholds were set based on 475 * testing data as well as attempting to minimize response time 476 * while increasing bulk throughput. 477 **/ 478 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 479 { 480 enum i40e_latency_range new_latency_range = rc->latency_range; 481 u32 new_itr = rc->itr; 482 int bytes_per_int; 483 484 if (rc->total_packets == 0 || !rc->itr) 485 return; 486 487 /* simple throttlerate management 488 * 0-10MB/s lowest (100000 ints/s) 489 * 10-20MB/s low (20000 ints/s) 490 * 20-1249MB/s bulk (8000 ints/s) 491 */ 492 bytes_per_int = rc->total_bytes / rc->itr; 493 switch (rc->itr) { 494 case I40E_LOWEST_LATENCY: 495 if (bytes_per_int > 10) 496 new_latency_range = I40E_LOW_LATENCY; 497 break; 498 case I40E_LOW_LATENCY: 499 if (bytes_per_int > 20) 500 new_latency_range = I40E_BULK_LATENCY; 501 else if (bytes_per_int <= 10) 502 new_latency_range = I40E_LOWEST_LATENCY; 503 break; 504 case I40E_BULK_LATENCY: 505 if (bytes_per_int <= 20) 506 rc->latency_range = I40E_LOW_LATENCY; 507 break; 508 } 509 510 switch (new_latency_range) { 511 case I40E_LOWEST_LATENCY: 512 new_itr = I40E_ITR_100K; 513 break; 514 case I40E_LOW_LATENCY: 515 new_itr = I40E_ITR_20K; 516 break; 517 case I40E_BULK_LATENCY: 518 new_itr = I40E_ITR_8K; 519 break; 520 default: 521 break; 522 } 523 524 if (new_itr != rc->itr) { 525 /* do an exponential smoothing */ 526 new_itr = (10 * new_itr * rc->itr) / 527 ((9 * new_itr) + rc->itr); 528 rc->itr = new_itr & I40E_MAX_ITR; 529 } 530 531 rc->total_bytes = 0; 532 rc->total_packets = 0; 533 } 534 535 /** 536 * i40e_update_dynamic_itr - Adjust ITR based on bytes per int 537 * @q_vector: the vector to adjust 538 **/ 539 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector) 540 { 541 u16 vector = q_vector->vsi->base_vector + q_vector->v_idx; 542 struct i40e_hw *hw = &q_vector->vsi->back->hw; 543 u32 reg_addr; 544 u16 old_itr; 545 546 reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1); 547 old_itr = q_vector->rx.itr; 548 i40e_set_new_dynamic_itr(&q_vector->rx); 549 if (old_itr != q_vector->rx.itr) 550 wr32(hw, reg_addr, q_vector->rx.itr); 551 552 reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1); 553 old_itr = q_vector->tx.itr; 554 i40e_set_new_dynamic_itr(&q_vector->tx); 555 if (old_itr != q_vector->tx.itr) 556 wr32(hw, reg_addr, q_vector->tx.itr); 557 } 558 559 /** 560 * i40e_clean_programming_status - clean the programming status descriptor 561 * @rx_ring: the rx ring that has this descriptor 562 * @rx_desc: the rx descriptor written back by HW 563 * 564 * Flow director should handle FD_FILTER_STATUS to check its filter programming 565 * status being successful or not and take actions accordingly. FCoE should 566 * handle its context/filter programming/invalidation status and take actions. 567 * 568 **/ 569 static void i40e_clean_programming_status(struct i40e_ring *rx_ring, 570 union i40e_rx_desc *rx_desc) 571 { 572 u64 qw; 573 u8 id; 574 575 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 576 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> 577 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; 578 579 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) 580 i40e_fd_handle_status(rx_ring, qw, id); 581 } 582 583 /** 584 * i40e_setup_tx_descriptors - Allocate the Tx descriptors 585 * @tx_ring: the tx ring to set up 586 * 587 * Return 0 on success, negative on error 588 **/ 589 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) 590 { 591 struct device *dev = tx_ring->dev; 592 int bi_size; 593 594 if (!dev) 595 return -ENOMEM; 596 597 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 598 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 599 if (!tx_ring->tx_bi) 600 goto err; 601 602 /* round up to nearest 4K */ 603 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 604 tx_ring->size = ALIGN(tx_ring->size, 4096); 605 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 606 &tx_ring->dma, GFP_KERNEL); 607 if (!tx_ring->desc) { 608 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 609 tx_ring->size); 610 goto err; 611 } 612 613 tx_ring->next_to_use = 0; 614 tx_ring->next_to_clean = 0; 615 return 0; 616 617 err: 618 kfree(tx_ring->tx_bi); 619 tx_ring->tx_bi = NULL; 620 return -ENOMEM; 621 } 622 623 /** 624 * i40e_clean_rx_ring - Free Rx buffers 625 * @rx_ring: ring to be cleaned 626 **/ 627 void i40e_clean_rx_ring(struct i40e_ring *rx_ring) 628 { 629 struct device *dev = rx_ring->dev; 630 struct i40e_rx_buffer *rx_bi; 631 unsigned long bi_size; 632 u16 i; 633 634 /* ring already cleared, nothing to do */ 635 if (!rx_ring->rx_bi) 636 return; 637 638 /* Free all the Rx ring sk_buffs */ 639 for (i = 0; i < rx_ring->count; i++) { 640 rx_bi = &rx_ring->rx_bi[i]; 641 if (rx_bi->dma) { 642 dma_unmap_single(dev, 643 rx_bi->dma, 644 rx_ring->rx_buf_len, 645 DMA_FROM_DEVICE); 646 rx_bi->dma = 0; 647 } 648 if (rx_bi->skb) { 649 dev_kfree_skb(rx_bi->skb); 650 rx_bi->skb = NULL; 651 } 652 if (rx_bi->page) { 653 if (rx_bi->page_dma) { 654 dma_unmap_page(dev, 655 rx_bi->page_dma, 656 PAGE_SIZE / 2, 657 DMA_FROM_DEVICE); 658 rx_bi->page_dma = 0; 659 } 660 __free_page(rx_bi->page); 661 rx_bi->page = NULL; 662 rx_bi->page_offset = 0; 663 } 664 } 665 666 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 667 memset(rx_ring->rx_bi, 0, bi_size); 668 669 /* Zero out the descriptor ring */ 670 memset(rx_ring->desc, 0, rx_ring->size); 671 672 rx_ring->next_to_clean = 0; 673 rx_ring->next_to_use = 0; 674 } 675 676 /** 677 * i40e_free_rx_resources - Free Rx resources 678 * @rx_ring: ring to clean the resources from 679 * 680 * Free all receive software resources 681 **/ 682 void i40e_free_rx_resources(struct i40e_ring *rx_ring) 683 { 684 i40e_clean_rx_ring(rx_ring); 685 kfree(rx_ring->rx_bi); 686 rx_ring->rx_bi = NULL; 687 688 if (rx_ring->desc) { 689 dma_free_coherent(rx_ring->dev, rx_ring->size, 690 rx_ring->desc, rx_ring->dma); 691 rx_ring->desc = NULL; 692 } 693 } 694 695 /** 696 * i40e_setup_rx_descriptors - Allocate Rx descriptors 697 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 698 * 699 * Returns 0 on success, negative on failure 700 **/ 701 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) 702 { 703 struct device *dev = rx_ring->dev; 704 int bi_size; 705 706 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 707 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 708 if (!rx_ring->rx_bi) 709 goto err; 710 711 /* Round up to nearest 4K */ 712 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring) 713 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc) 714 : rx_ring->count * sizeof(union i40e_32byte_rx_desc); 715 rx_ring->size = ALIGN(rx_ring->size, 4096); 716 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 717 &rx_ring->dma, GFP_KERNEL); 718 719 if (!rx_ring->desc) { 720 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 721 rx_ring->size); 722 goto err; 723 } 724 725 rx_ring->next_to_clean = 0; 726 rx_ring->next_to_use = 0; 727 728 return 0; 729 err: 730 kfree(rx_ring->rx_bi); 731 rx_ring->rx_bi = NULL; 732 return -ENOMEM; 733 } 734 735 /** 736 * i40e_release_rx_desc - Store the new tail and head values 737 * @rx_ring: ring to bump 738 * @val: new head index 739 **/ 740 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 741 { 742 rx_ring->next_to_use = val; 743 /* Force memory writes to complete before letting h/w 744 * know there are new descriptors to fetch. (Only 745 * applicable for weak-ordered memory model archs, 746 * such as IA-64). 747 */ 748 wmb(); 749 writel(val, rx_ring->tail); 750 } 751 752 /** 753 * i40e_alloc_rx_buffers - Replace used receive buffers; packet split 754 * @rx_ring: ring to place buffers on 755 * @cleaned_count: number of buffers to replace 756 **/ 757 void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) 758 { 759 u16 i = rx_ring->next_to_use; 760 union i40e_rx_desc *rx_desc; 761 struct i40e_rx_buffer *bi; 762 struct sk_buff *skb; 763 764 /* do nothing if no valid netdev defined */ 765 if (!rx_ring->netdev || !cleaned_count) 766 return; 767 768 while (cleaned_count--) { 769 rx_desc = I40E_RX_DESC(rx_ring, i); 770 bi = &rx_ring->rx_bi[i]; 771 skb = bi->skb; 772 773 if (!skb) { 774 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 775 rx_ring->rx_buf_len); 776 if (!skb) { 777 rx_ring->rx_stats.alloc_buff_failed++; 778 goto no_buffers; 779 } 780 /* initialize queue mapping */ 781 skb_record_rx_queue(skb, rx_ring->queue_index); 782 bi->skb = skb; 783 } 784 785 if (!bi->dma) { 786 bi->dma = dma_map_single(rx_ring->dev, 787 skb->data, 788 rx_ring->rx_buf_len, 789 DMA_FROM_DEVICE); 790 if (dma_mapping_error(rx_ring->dev, bi->dma)) { 791 rx_ring->rx_stats.alloc_buff_failed++; 792 bi->dma = 0; 793 goto no_buffers; 794 } 795 } 796 797 if (ring_is_ps_enabled(rx_ring)) { 798 if (!bi->page) { 799 bi->page = alloc_page(GFP_ATOMIC); 800 if (!bi->page) { 801 rx_ring->rx_stats.alloc_page_failed++; 802 goto no_buffers; 803 } 804 } 805 806 if (!bi->page_dma) { 807 /* use a half page if we're re-using */ 808 bi->page_offset ^= PAGE_SIZE / 2; 809 bi->page_dma = dma_map_page(rx_ring->dev, 810 bi->page, 811 bi->page_offset, 812 PAGE_SIZE / 2, 813 DMA_FROM_DEVICE); 814 if (dma_mapping_error(rx_ring->dev, 815 bi->page_dma)) { 816 rx_ring->rx_stats.alloc_page_failed++; 817 bi->page_dma = 0; 818 goto no_buffers; 819 } 820 } 821 822 /* Refresh the desc even if buffer_addrs didn't change 823 * because each write-back erases this info. 824 */ 825 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 826 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 827 } else { 828 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); 829 rx_desc->read.hdr_addr = 0; 830 } 831 i++; 832 if (i == rx_ring->count) 833 i = 0; 834 } 835 836 no_buffers: 837 if (rx_ring->next_to_use != i) 838 i40e_release_rx_desc(rx_ring, i); 839 } 840 841 /** 842 * i40e_receive_skb - Send a completed packet up the stack 843 * @rx_ring: rx ring in play 844 * @skb: packet to send up 845 * @vlan_tag: vlan tag for packet 846 **/ 847 static void i40e_receive_skb(struct i40e_ring *rx_ring, 848 struct sk_buff *skb, u16 vlan_tag) 849 { 850 struct i40e_q_vector *q_vector = rx_ring->q_vector; 851 struct i40e_vsi *vsi = rx_ring->vsi; 852 u64 flags = vsi->back->flags; 853 854 if (vlan_tag & VLAN_VID_MASK) 855 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 856 857 if (flags & I40E_FLAG_IN_NETPOLL) 858 netif_rx(skb); 859 else 860 napi_gro_receive(&q_vector->napi, skb); 861 } 862 863 /** 864 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 865 * @vsi: the VSI we care about 866 * @skb: skb currently being received and modified 867 * @rx_status: status value of last descriptor in packet 868 * @rx_error: error value of last descriptor in packet 869 * @rx_ptype: ptype value of last descriptor in packet 870 **/ 871 static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 872 struct sk_buff *skb, 873 u32 rx_status, 874 u32 rx_error, 875 u16 rx_ptype) 876 { 877 bool ipv4_tunnel, ipv6_tunnel; 878 __wsum rx_udp_csum; 879 __sum16 csum; 880 struct iphdr *iph; 881 882 ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && 883 (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); 884 ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && 885 (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); 886 887 skb->encapsulation = ipv4_tunnel || ipv6_tunnel; 888 skb->ip_summed = CHECKSUM_NONE; 889 890 /* Rx csum enabled and ip headers found? */ 891 if (!(vsi->netdev->features & NETIF_F_RXCSUM && 892 rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 893 return; 894 895 /* likely incorrect csum if alternate IP extention headers found */ 896 if (rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 897 return; 898 899 /* IP or L4 or outmost IP checksum error */ 900 if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 901 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT) | 902 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))) { 903 vsi->back->hw_csum_rx_error++; 904 return; 905 } 906 907 if (ipv4_tunnel && 908 !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) { 909 /* If VXLAN traffic has an outer UDPv4 checksum we need to check 910 * it in the driver, hardware does not do it for us. 911 * Since L3L4P bit was set we assume a valid IHL value (>=5) 912 * so the total length of IPv4 header is IHL*4 bytes 913 */ 914 skb->transport_header = skb->mac_header + 915 sizeof(struct ethhdr) + 916 (ip_hdr(skb)->ihl * 4); 917 918 /* Add 4 bytes for VLAN tagged packets */ 919 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || 920 skb->protocol == htons(ETH_P_8021AD)) 921 ? VLAN_HLEN : 0; 922 923 rx_udp_csum = udp_csum(skb); 924 iph = ip_hdr(skb); 925 csum = csum_tcpudp_magic( 926 iph->saddr, iph->daddr, 927 (skb->len - skb_transport_offset(skb)), 928 IPPROTO_UDP, rx_udp_csum); 929 930 if (udp_hdr(skb)->check != csum) { 931 vsi->back->hw_csum_rx_error++; 932 return; 933 } 934 } 935 936 skb->ip_summed = CHECKSUM_UNNECESSARY; 937 } 938 939 /** 940 * i40e_rx_hash - returns the hash value from the Rx descriptor 941 * @ring: descriptor ring 942 * @rx_desc: specific descriptor 943 **/ 944 static inline u32 i40e_rx_hash(struct i40e_ring *ring, 945 union i40e_rx_desc *rx_desc) 946 { 947 const __le64 rss_mask = 948 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 949 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 950 951 if ((ring->netdev->features & NETIF_F_RXHASH) && 952 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) 953 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 954 else 955 return 0; 956 } 957 958 /** 959 * i40e_clean_rx_irq - Reclaim resources after receive completes 960 * @rx_ring: rx ring to clean 961 * @budget: how many cleans we're allowed 962 * 963 * Returns true if there's any budget left (e.g. the clean is finished) 964 **/ 965 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) 966 { 967 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 968 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; 969 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 970 const int current_node = numa_node_id(); 971 struct i40e_vsi *vsi = rx_ring->vsi; 972 u16 i = rx_ring->next_to_clean; 973 union i40e_rx_desc *rx_desc; 974 u32 rx_error, rx_status; 975 u64 qword; 976 u16 rx_ptype; 977 978 rx_desc = I40E_RX_DESC(rx_ring, i); 979 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 980 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 981 I40E_RXD_QW1_STATUS_SHIFT; 982 983 while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) { 984 union i40e_rx_desc *next_rxd; 985 struct i40e_rx_buffer *rx_bi; 986 struct sk_buff *skb; 987 u16 vlan_tag; 988 if (i40e_rx_is_programming_status(qword)) { 989 i40e_clean_programming_status(rx_ring, rx_desc); 990 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd); 991 goto next_desc; 992 } 993 rx_bi = &rx_ring->rx_bi[i]; 994 skb = rx_bi->skb; 995 prefetch(skb->data); 996 997 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 998 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 999 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> 1000 I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1001 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> 1002 I40E_RXD_QW1_LENGTH_SPH_SHIFT; 1003 1004 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1005 I40E_RXD_QW1_ERROR_SHIFT; 1006 rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1007 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1008 1009 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1010 I40E_RXD_QW1_PTYPE_SHIFT; 1011 rx_bi->skb = NULL; 1012 1013 /* This memory barrier is needed to keep us from reading 1014 * any other fields out of the rx_desc until we know the 1015 * STATUS_DD bit is set 1016 */ 1017 rmb(); 1018 1019 /* Get the header and possibly the whole packet 1020 * If this is an skb from previous receive dma will be 0 1021 */ 1022 if (rx_bi->dma) { 1023 u16 len; 1024 1025 if (rx_hbo) 1026 len = I40E_RX_HDR_SIZE; 1027 else if (rx_sph) 1028 len = rx_header_len; 1029 else if (rx_packet_len) 1030 len = rx_packet_len; /* 1buf/no split found */ 1031 else 1032 len = rx_header_len; /* split always mode */ 1033 1034 skb_put(skb, len); 1035 dma_unmap_single(rx_ring->dev, 1036 rx_bi->dma, 1037 rx_ring->rx_buf_len, 1038 DMA_FROM_DEVICE); 1039 rx_bi->dma = 0; 1040 } 1041 1042 /* Get the rest of the data if this was a header split */ 1043 if (ring_is_ps_enabled(rx_ring) && rx_packet_len) { 1044 1045 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 1046 rx_bi->page, 1047 rx_bi->page_offset, 1048 rx_packet_len); 1049 1050 skb->len += rx_packet_len; 1051 skb->data_len += rx_packet_len; 1052 skb->truesize += rx_packet_len; 1053 1054 if ((page_count(rx_bi->page) == 1) && 1055 (page_to_nid(rx_bi->page) == current_node)) 1056 get_page(rx_bi->page); 1057 else 1058 rx_bi->page = NULL; 1059 1060 dma_unmap_page(rx_ring->dev, 1061 rx_bi->page_dma, 1062 PAGE_SIZE / 2, 1063 DMA_FROM_DEVICE); 1064 rx_bi->page_dma = 0; 1065 } 1066 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd); 1067 1068 if (unlikely( 1069 !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 1070 struct i40e_rx_buffer *next_buffer; 1071 1072 next_buffer = &rx_ring->rx_bi[i]; 1073 1074 if (ring_is_ps_enabled(rx_ring)) { 1075 rx_bi->skb = next_buffer->skb; 1076 rx_bi->dma = next_buffer->dma; 1077 next_buffer->skb = skb; 1078 next_buffer->dma = 0; 1079 } 1080 rx_ring->rx_stats.non_eop_descs++; 1081 goto next_desc; 1082 } 1083 1084 /* ERR_MASK will only have valid bits if EOP set */ 1085 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1086 dev_kfree_skb_any(skb); 1087 goto next_desc; 1088 } 1089 1090 skb->rxhash = i40e_rx_hash(rx_ring, rx_desc); 1091 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) { 1092 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status & 1093 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> 1094 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT); 1095 rx_ring->last_rx_timestamp = jiffies; 1096 } 1097 1098 /* probably a little skewed due to removing CRC */ 1099 total_rx_bytes += skb->len; 1100 total_rx_packets++; 1101 1102 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1103 1104 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1105 1106 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1107 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1108 : 0; 1109 i40e_receive_skb(rx_ring, skb, vlan_tag); 1110 1111 rx_ring->netdev->last_rx = jiffies; 1112 budget--; 1113 next_desc: 1114 rx_desc->wb.qword1.status_error_len = 0; 1115 if (!budget) 1116 break; 1117 1118 cleaned_count++; 1119 /* return some buffers to hardware, one at a time is too slow */ 1120 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1121 i40e_alloc_rx_buffers(rx_ring, cleaned_count); 1122 cleaned_count = 0; 1123 } 1124 1125 /* use prefetched values */ 1126 rx_desc = next_rxd; 1127 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1128 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1129 I40E_RXD_QW1_STATUS_SHIFT; 1130 } 1131 1132 rx_ring->next_to_clean = i; 1133 u64_stats_update_begin(&rx_ring->syncp); 1134 rx_ring->stats.packets += total_rx_packets; 1135 rx_ring->stats.bytes += total_rx_bytes; 1136 u64_stats_update_end(&rx_ring->syncp); 1137 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1138 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1139 1140 if (cleaned_count) 1141 i40e_alloc_rx_buffers(rx_ring, cleaned_count); 1142 1143 return budget > 0; 1144 } 1145 1146 /** 1147 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine 1148 * @napi: napi struct with our devices info in it 1149 * @budget: amount of work driver is allowed to do this pass, in packets 1150 * 1151 * This function will clean all queues associated with a q_vector. 1152 * 1153 * Returns the amount of work done 1154 **/ 1155 int i40e_napi_poll(struct napi_struct *napi, int budget) 1156 { 1157 struct i40e_q_vector *q_vector = 1158 container_of(napi, struct i40e_q_vector, napi); 1159 struct i40e_vsi *vsi = q_vector->vsi; 1160 struct i40e_ring *ring; 1161 bool clean_complete = true; 1162 int budget_per_ring; 1163 1164 if (test_bit(__I40E_DOWN, &vsi->state)) { 1165 napi_complete(napi); 1166 return 0; 1167 } 1168 1169 /* Since the actual Tx work is minimal, we can give the Tx a larger 1170 * budget and be more aggressive about cleaning up the Tx descriptors. 1171 */ 1172 i40e_for_each_ring(ring, q_vector->tx) 1173 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); 1174 1175 /* We attempt to distribute budget to each Rx queue fairly, but don't 1176 * allow the budget to go below 1 because that would exit polling early. 1177 */ 1178 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 1179 1180 i40e_for_each_ring(ring, q_vector->rx) 1181 clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring); 1182 1183 /* If work not completed, return budget and polling will return */ 1184 if (!clean_complete) 1185 return budget; 1186 1187 /* Work is done so exit the polling mode and re-enable the interrupt */ 1188 napi_complete(napi); 1189 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) || 1190 ITR_IS_DYNAMIC(vsi->tx_itr_setting)) 1191 i40e_update_dynamic_itr(q_vector); 1192 1193 if (!test_bit(__I40E_DOWN, &vsi->state)) { 1194 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 1195 i40e_irq_dynamic_enable(vsi, 1196 q_vector->v_idx + vsi->base_vector); 1197 } else { 1198 struct i40e_hw *hw = &vsi->back->hw; 1199 /* We re-enable the queue 0 cause, but 1200 * don't worry about dynamic_enable 1201 * because we left it on for the other 1202 * possible interrupts during napi 1203 */ 1204 u32 qval = rd32(hw, I40E_QINT_RQCTL(0)); 1205 qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 1206 wr32(hw, I40E_QINT_RQCTL(0), qval); 1207 1208 qval = rd32(hw, I40E_QINT_TQCTL(0)); 1209 qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; 1210 wr32(hw, I40E_QINT_TQCTL(0), qval); 1211 1212 i40e_irq_dynamic_enable_icr0(vsi->back); 1213 } 1214 } 1215 1216 return 0; 1217 } 1218 1219 /** 1220 * i40e_atr - Add a Flow Director ATR filter 1221 * @tx_ring: ring to add programming descriptor to 1222 * @skb: send buffer 1223 * @flags: send flags 1224 * @protocol: wire protocol 1225 **/ 1226 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, 1227 u32 flags, __be16 protocol) 1228 { 1229 struct i40e_filter_program_desc *fdir_desc; 1230 struct i40e_pf *pf = tx_ring->vsi->back; 1231 union { 1232 unsigned char *network; 1233 struct iphdr *ipv4; 1234 struct ipv6hdr *ipv6; 1235 } hdr; 1236 struct tcphdr *th; 1237 unsigned int hlen; 1238 u32 flex_ptype, dtype_cmd; 1239 u16 i; 1240 1241 /* make sure ATR is enabled */ 1242 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) 1243 return; 1244 1245 /* if sampling is disabled do nothing */ 1246 if (!tx_ring->atr_sample_rate) 1247 return; 1248 1249 tx_ring->atr_count++; 1250 1251 /* snag network header to get L4 type and address */ 1252 hdr.network = skb_network_header(skb); 1253 1254 /* Currently only IPv4/IPv6 with TCP is supported */ 1255 if (protocol == htons(ETH_P_IP)) { 1256 if (hdr.ipv4->protocol != IPPROTO_TCP) 1257 return; 1258 1259 /* access ihl as a u8 to avoid unaligned access on ia64 */ 1260 hlen = (hdr.network[0] & 0x0F) << 2; 1261 } else if (protocol == htons(ETH_P_IPV6)) { 1262 if (hdr.ipv6->nexthdr != IPPROTO_TCP) 1263 return; 1264 1265 hlen = sizeof(struct ipv6hdr); 1266 } else { 1267 return; 1268 } 1269 1270 th = (struct tcphdr *)(hdr.network + hlen); 1271 1272 /* sample on all syn/fin packets or once every atr sample rate */ 1273 if (!th->fin && !th->syn && (tx_ring->atr_count < tx_ring->atr_sample_rate)) 1274 return; 1275 1276 tx_ring->atr_count = 0; 1277 1278 /* grab the next descriptor */ 1279 i = tx_ring->next_to_use; 1280 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 1281 1282 i++; 1283 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1284 1285 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 1286 I40E_TXD_FLTR_QW0_QINDEX_MASK; 1287 flex_ptype |= (protocol == htons(ETH_P_IP)) ? 1288 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << 1289 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : 1290 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << 1291 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); 1292 1293 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; 1294 1295 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; 1296 1297 dtype_cmd |= th->fin ? 1298 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 1299 I40E_TXD_FLTR_QW1_PCMD_SHIFT) : 1300 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 1301 I40E_TXD_FLTR_QW1_PCMD_SHIFT); 1302 1303 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX << 1304 I40E_TXD_FLTR_QW1_DEST_SHIFT; 1305 1306 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << 1307 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; 1308 1309 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); 1310 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 1311 } 1312 1313 /** 1314 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 1315 * @skb: send buffer 1316 * @tx_ring: ring to send buffer on 1317 * @flags: the tx flags to be set 1318 * 1319 * Checks the skb and set up correspondingly several generic transmit flags 1320 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 1321 * 1322 * Returns error code indicate the frame should be dropped upon error and the 1323 * otherwise returns 0 to indicate the flags has been set properly. 1324 **/ 1325 static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 1326 struct i40e_ring *tx_ring, 1327 u32 *flags) 1328 { 1329 __be16 protocol = skb->protocol; 1330 u32 tx_flags = 0; 1331 1332 /* if we have a HW VLAN tag being added, default to the HW one */ 1333 if (vlan_tx_tag_present(skb)) { 1334 tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 1335 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1336 /* else if it is a SW VLAN, check the next protocol and store the tag */ 1337 } else if (protocol == htons(ETH_P_8021Q)) { 1338 struct vlan_hdr *vhdr, _vhdr; 1339 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 1340 if (!vhdr) 1341 return -EINVAL; 1342 1343 protocol = vhdr->h_vlan_encapsulated_proto; 1344 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 1345 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 1346 } 1347 1348 /* Insert 802.1p priority into VLAN header */ 1349 if ((tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED) && 1350 ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) || 1351 (skb->priority != TC_PRIO_CONTROL))) { 1352 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK; 1353 tx_flags |= (skb->priority & 0x7) << 1354 I40E_TX_FLAGS_VLAN_PRIO_SHIFT; 1355 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) { 1356 struct vlan_ethhdr *vhdr; 1357 if (skb_header_cloned(skb) && 1358 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 1359 return -ENOMEM; 1360 vhdr = (struct vlan_ethhdr *)skb->data; 1361 vhdr->h_vlan_TCI = htons(tx_flags >> 1362 I40E_TX_FLAGS_VLAN_SHIFT); 1363 } else { 1364 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1365 } 1366 } 1367 *flags = tx_flags; 1368 return 0; 1369 } 1370 1371 /** 1372 * i40e_tso - set up the tso context descriptor 1373 * @tx_ring: ptr to the ring to send 1374 * @skb: ptr to the skb we're sending 1375 * @tx_flags: the collected send information 1376 * @protocol: the send protocol 1377 * @hdr_len: ptr to the size of the packet header 1378 * @cd_tunneling: ptr to context descriptor bits 1379 * 1380 * Returns 0 if no TSO can happen, 1 if tso is going, or error 1381 **/ 1382 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, 1383 u32 tx_flags, __be16 protocol, u8 *hdr_len, 1384 u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) 1385 { 1386 u32 cd_cmd, cd_tso_len, cd_mss; 1387 struct tcphdr *tcph; 1388 struct iphdr *iph; 1389 u32 l4len; 1390 int err; 1391 struct ipv6hdr *ipv6h; 1392 1393 if (!skb_is_gso(skb)) 1394 return 0; 1395 1396 if (skb_header_cloned(skb)) { 1397 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1398 if (err) 1399 return err; 1400 } 1401 1402 if (protocol == htons(ETH_P_IP)) { 1403 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 1404 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1405 iph->tot_len = 0; 1406 iph->check = 0; 1407 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1408 0, IPPROTO_TCP, 0); 1409 } else if (skb_is_gso_v6(skb)) { 1410 1411 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) 1412 : ipv6_hdr(skb); 1413 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1414 ipv6h->payload_len = 0; 1415 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 1416 0, IPPROTO_TCP, 0); 1417 } 1418 1419 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); 1420 *hdr_len = (skb->encapsulation 1421 ? (skb_inner_transport_header(skb) - skb->data) 1422 : skb_transport_offset(skb)) + l4len; 1423 1424 /* find the field values */ 1425 cd_cmd = I40E_TX_CTX_DESC_TSO; 1426 cd_tso_len = skb->len - *hdr_len; 1427 cd_mss = skb_shinfo(skb)->gso_size; 1428 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 1429 ((u64)cd_tso_len << 1430 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 1431 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 1432 return 1; 1433 } 1434 1435 /** 1436 * i40e_tsyn - set up the tsyn context descriptor 1437 * @tx_ring: ptr to the ring to send 1438 * @skb: ptr to the skb we're sending 1439 * @tx_flags: the collected send information 1440 * 1441 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen 1442 **/ 1443 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, 1444 u32 tx_flags, u64 *cd_type_cmd_tso_mss) 1445 { 1446 struct i40e_pf *pf; 1447 1448 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) 1449 return 0; 1450 1451 /* Tx timestamps cannot be sampled when doing TSO */ 1452 if (tx_flags & I40E_TX_FLAGS_TSO) 1453 return 0; 1454 1455 /* only timestamp the outbound packet if the user has requested it and 1456 * we are not already transmitting a packet to be timestamped 1457 */ 1458 pf = i40e_netdev_to_pf(tx_ring->netdev); 1459 if (pf->ptp_tx && !pf->ptp_tx_skb) { 1460 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1461 pf->ptp_tx_skb = skb_get(skb); 1462 } else { 1463 return 0; 1464 } 1465 1466 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN << 1467 I40E_TXD_CTX_QW1_CMD_SHIFT; 1468 1469 pf->ptp_tx_start = jiffies; 1470 schedule_work(&pf->ptp_tx_work); 1471 1472 return 1; 1473 } 1474 1475 /** 1476 * i40e_tx_enable_csum - Enable Tx checksum offloads 1477 * @skb: send buffer 1478 * @tx_flags: Tx flags currently set 1479 * @td_cmd: Tx descriptor command bits to set 1480 * @td_offset: Tx descriptor header offsets to set 1481 * @cd_tunneling: ptr to context desc bits 1482 **/ 1483 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, 1484 u32 *td_cmd, u32 *td_offset, 1485 struct i40e_ring *tx_ring, 1486 u32 *cd_tunneling) 1487 { 1488 struct ipv6hdr *this_ipv6_hdr; 1489 unsigned int this_tcp_hdrlen; 1490 struct iphdr *this_ip_hdr; 1491 u32 network_hdr_len; 1492 u8 l4_hdr = 0; 1493 1494 if (skb->encapsulation) { 1495 network_hdr_len = skb_inner_network_header_len(skb); 1496 this_ip_hdr = inner_ip_hdr(skb); 1497 this_ipv6_hdr = inner_ipv6_hdr(skb); 1498 this_tcp_hdrlen = inner_tcp_hdrlen(skb); 1499 1500 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1501 1502 if (tx_flags & I40E_TX_FLAGS_TSO) { 1503 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; 1504 ip_hdr(skb)->check = 0; 1505 } else { 1506 *cd_tunneling |= 1507 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1508 } 1509 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1510 if (tx_flags & I40E_TX_FLAGS_TSO) { 1511 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; 1512 ip_hdr(skb)->check = 0; 1513 } else { 1514 *cd_tunneling |= 1515 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1516 } 1517 } 1518 1519 /* Now set the ctx descriptor fields */ 1520 *cd_tunneling |= (skb_network_header_len(skb) >> 2) << 1521 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | 1522 I40E_TXD_CTX_UDP_TUNNELING | 1523 ((skb_inner_network_offset(skb) - 1524 skb_transport_offset(skb)) >> 1) << 1525 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 1526 1527 } else { 1528 network_hdr_len = skb_network_header_len(skb); 1529 this_ip_hdr = ip_hdr(skb); 1530 this_ipv6_hdr = ipv6_hdr(skb); 1531 this_tcp_hdrlen = tcp_hdrlen(skb); 1532 } 1533 1534 /* Enable IP checksum offloads */ 1535 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1536 l4_hdr = this_ip_hdr->protocol; 1537 /* the stack computes the IP header already, the only time we 1538 * need the hardware to recompute it is in the case of TSO. 1539 */ 1540 if (tx_flags & I40E_TX_FLAGS_TSO) { 1541 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 1542 this_ip_hdr->check = 0; 1543 } else { 1544 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 1545 } 1546 /* Now set the td_offset for IP header length */ 1547 *td_offset = (network_hdr_len >> 2) << 1548 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1549 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1550 l4_hdr = this_ipv6_hdr->nexthdr; 1551 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 1552 /* Now set the td_offset for IP header length */ 1553 *td_offset = (network_hdr_len >> 2) << 1554 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1555 } 1556 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ 1557 *td_offset |= (skb_network_offset(skb) >> 1) << 1558 I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1559 1560 /* Enable L4 checksum offloads */ 1561 switch (l4_hdr) { 1562 case IPPROTO_TCP: 1563 /* enable checksum offloads */ 1564 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 1565 *td_offset |= (this_tcp_hdrlen >> 2) << 1566 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1567 break; 1568 case IPPROTO_SCTP: 1569 /* enable SCTP checksum offload */ 1570 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 1571 *td_offset |= (sizeof(struct sctphdr) >> 2) << 1572 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1573 break; 1574 case IPPROTO_UDP: 1575 /* enable UDP checksum offload */ 1576 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 1577 *td_offset |= (sizeof(struct udphdr) >> 2) << 1578 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1579 break; 1580 default: 1581 break; 1582 } 1583 } 1584 1585 /** 1586 * i40e_create_tx_ctx Build the Tx context descriptor 1587 * @tx_ring: ring to create the descriptor on 1588 * @cd_type_cmd_tso_mss: Quad Word 1 1589 * @cd_tunneling: Quad Word 0 - bits 0-31 1590 * @cd_l2tag2: Quad Word 0 - bits 32-63 1591 **/ 1592 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 1593 const u64 cd_type_cmd_tso_mss, 1594 const u32 cd_tunneling, const u32 cd_l2tag2) 1595 { 1596 struct i40e_tx_context_desc *context_desc; 1597 int i = tx_ring->next_to_use; 1598 1599 if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2) 1600 return; 1601 1602 /* grab the next descriptor */ 1603 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 1604 1605 i++; 1606 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1607 1608 /* cpu_to_le32 and assign to struct fields */ 1609 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 1610 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 1611 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 1612 } 1613 1614 /** 1615 * i40e_tx_map - Build the Tx descriptor 1616 * @tx_ring: ring to send buffer on 1617 * @skb: send buffer 1618 * @first: first buffer info buffer to use 1619 * @tx_flags: collected send information 1620 * @hdr_len: size of the packet header 1621 * @td_cmd: the command field in the descriptor 1622 * @td_offset: offset for checksum or crc 1623 **/ 1624 static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 1625 struct i40e_tx_buffer *first, u32 tx_flags, 1626 const u8 hdr_len, u32 td_cmd, u32 td_offset) 1627 { 1628 unsigned int data_len = skb->data_len; 1629 unsigned int size = skb_headlen(skb); 1630 struct skb_frag_struct *frag; 1631 struct i40e_tx_buffer *tx_bi; 1632 struct i40e_tx_desc *tx_desc; 1633 u16 i = tx_ring->next_to_use; 1634 u32 td_tag = 0; 1635 dma_addr_t dma; 1636 u16 gso_segs; 1637 1638 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 1639 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 1640 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 1641 I40E_TX_FLAGS_VLAN_SHIFT; 1642 } 1643 1644 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 1645 gso_segs = skb_shinfo(skb)->gso_segs; 1646 else 1647 gso_segs = 1; 1648 1649 /* multiply data chunks by size of headers */ 1650 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); 1651 first->gso_segs = gso_segs; 1652 first->skb = skb; 1653 first->tx_flags = tx_flags; 1654 1655 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1656 1657 tx_desc = I40E_TX_DESC(tx_ring, i); 1658 tx_bi = first; 1659 1660 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1661 if (dma_mapping_error(tx_ring->dev, dma)) 1662 goto dma_error; 1663 1664 /* record length, and DMA address */ 1665 dma_unmap_len_set(tx_bi, len, size); 1666 dma_unmap_addr_set(tx_bi, dma, dma); 1667 1668 tx_desc->buffer_addr = cpu_to_le64(dma); 1669 1670 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 1671 tx_desc->cmd_type_offset_bsz = 1672 build_ctob(td_cmd, td_offset, 1673 I40E_MAX_DATA_PER_TXD, td_tag); 1674 1675 tx_desc++; 1676 i++; 1677 if (i == tx_ring->count) { 1678 tx_desc = I40E_TX_DESC(tx_ring, 0); 1679 i = 0; 1680 } 1681 1682 dma += I40E_MAX_DATA_PER_TXD; 1683 size -= I40E_MAX_DATA_PER_TXD; 1684 1685 tx_desc->buffer_addr = cpu_to_le64(dma); 1686 } 1687 1688 if (likely(!data_len)) 1689 break; 1690 1691 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 1692 size, td_tag); 1693 1694 tx_desc++; 1695 i++; 1696 if (i == tx_ring->count) { 1697 tx_desc = I40E_TX_DESC(tx_ring, 0); 1698 i = 0; 1699 } 1700 1701 size = skb_frag_size(frag); 1702 data_len -= size; 1703 1704 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 1705 DMA_TO_DEVICE); 1706 1707 tx_bi = &tx_ring->tx_bi[i]; 1708 } 1709 1710 tx_desc->cmd_type_offset_bsz = 1711 build_ctob(td_cmd, td_offset, size, td_tag) | 1712 cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); 1713 1714 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 1715 tx_ring->queue_index), 1716 first->bytecount); 1717 1718 /* set the timestamp */ 1719 first->time_stamp = jiffies; 1720 1721 /* Force memory writes to complete before letting h/w 1722 * know there are new descriptors to fetch. (Only 1723 * applicable for weak-ordered memory model archs, 1724 * such as IA-64). 1725 */ 1726 wmb(); 1727 1728 /* set next_to_watch value indicating a packet is present */ 1729 first->next_to_watch = tx_desc; 1730 1731 i++; 1732 if (i == tx_ring->count) 1733 i = 0; 1734 1735 tx_ring->next_to_use = i; 1736 1737 /* notify HW of packet */ 1738 writel(i, tx_ring->tail); 1739 1740 return; 1741 1742 dma_error: 1743 dev_info(tx_ring->dev, "TX DMA map failed\n"); 1744 1745 /* clear dma mappings for failed tx_bi map */ 1746 for (;;) { 1747 tx_bi = &tx_ring->tx_bi[i]; 1748 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 1749 if (tx_bi == first) 1750 break; 1751 if (i == 0) 1752 i = tx_ring->count; 1753 i--; 1754 } 1755 1756 tx_ring->next_to_use = i; 1757 } 1758 1759 /** 1760 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions 1761 * @tx_ring: the ring to be checked 1762 * @size: the size buffer we want to assure is available 1763 * 1764 * Returns -EBUSY if a stop is needed, else 0 1765 **/ 1766 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1767 { 1768 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 1769 /* Memory barrier before checking head and tail */ 1770 smp_mb(); 1771 1772 /* Check again in a case another CPU has just made room available. */ 1773 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 1774 return -EBUSY; 1775 1776 /* A reprieve! - use start_queue because it doesn't call schedule */ 1777 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 1778 ++tx_ring->tx_stats.restart_queue; 1779 return 0; 1780 } 1781 1782 /** 1783 * i40e_maybe_stop_tx - 1st level check for tx stop conditions 1784 * @tx_ring: the ring to be checked 1785 * @size: the size buffer we want to assure is available 1786 * 1787 * Returns 0 if stop is not needed 1788 **/ 1789 static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1790 { 1791 if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) 1792 return 0; 1793 return __i40e_maybe_stop_tx(tx_ring, size); 1794 } 1795 1796 /** 1797 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed 1798 * @skb: send buffer 1799 * @tx_ring: ring to send buffer on 1800 * 1801 * Returns number of data descriptors needed for this skb. Returns 0 to indicate 1802 * there is not enough descriptors available in this ring since we need at least 1803 * one descriptor. 1804 **/ 1805 static int i40e_xmit_descriptor_count(struct sk_buff *skb, 1806 struct i40e_ring *tx_ring) 1807 { 1808 #if PAGE_SIZE > I40E_MAX_DATA_PER_TXD 1809 unsigned int f; 1810 #endif 1811 int count = 0; 1812 1813 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 1814 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 1815 * + 2 desc gap to keep tail from touching head, 1816 * + 1 desc for context descriptor, 1817 * otherwise try next time 1818 */ 1819 #if PAGE_SIZE > I40E_MAX_DATA_PER_TXD 1820 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1821 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); 1822 #else 1823 count += skb_shinfo(skb)->nr_frags; 1824 #endif 1825 count += TXD_USE_COUNT(skb_headlen(skb)); 1826 if (i40e_maybe_stop_tx(tx_ring, count + 3)) { 1827 tx_ring->tx_stats.tx_busy++; 1828 return 0; 1829 } 1830 return count; 1831 } 1832 1833 /** 1834 * i40e_xmit_frame_ring - Sends buffer on Tx ring 1835 * @skb: send buffer 1836 * @tx_ring: ring to send buffer on 1837 * 1838 * Returns NETDEV_TX_OK if sent, else an error code 1839 **/ 1840 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 1841 struct i40e_ring *tx_ring) 1842 { 1843 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 1844 u32 cd_tunneling = 0, cd_l2tag2 = 0; 1845 struct i40e_tx_buffer *first; 1846 u32 td_offset = 0; 1847 u32 tx_flags = 0; 1848 __be16 protocol; 1849 u32 td_cmd = 0; 1850 u8 hdr_len = 0; 1851 int tsyn; 1852 int tso; 1853 if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) 1854 return NETDEV_TX_BUSY; 1855 1856 /* prepare the xmit flags */ 1857 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 1858 goto out_drop; 1859 1860 /* obtain protocol of skb */ 1861 protocol = skb->protocol; 1862 1863 /* record the location of the first descriptor for this packet */ 1864 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 1865 1866 /* setup IPv4/IPv6 offloads */ 1867 if (protocol == htons(ETH_P_IP)) 1868 tx_flags |= I40E_TX_FLAGS_IPV4; 1869 else if (protocol == htons(ETH_P_IPV6)) 1870 tx_flags |= I40E_TX_FLAGS_IPV6; 1871 1872 tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, 1873 &cd_type_cmd_tso_mss, &cd_tunneling); 1874 1875 if (tso < 0) 1876 goto out_drop; 1877 else if (tso) 1878 tx_flags |= I40E_TX_FLAGS_TSO; 1879 1880 skb_tx_timestamp(skb); 1881 1882 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); 1883 1884 if (tsyn) 1885 tx_flags |= I40E_TX_FLAGS_TSYN; 1886 1887 /* always enable CRC insertion offload */ 1888 td_cmd |= I40E_TX_DESC_CMD_ICRC; 1889 1890 /* Always offload the checksum, since it's in the data descriptor */ 1891 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1892 tx_flags |= I40E_TX_FLAGS_CSUM; 1893 1894 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, 1895 tx_ring, &cd_tunneling); 1896 } 1897 1898 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 1899 cd_tunneling, cd_l2tag2); 1900 1901 /* Add Flow Director ATR if it's enabled. 1902 * 1903 * NOTE: this must always be directly before the data descriptor. 1904 */ 1905 i40e_atr(tx_ring, skb, tx_flags, protocol); 1906 1907 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 1908 td_cmd, td_offset); 1909 1910 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); 1911 1912 return NETDEV_TX_OK; 1913 1914 out_drop: 1915 dev_kfree_skb_any(skb); 1916 return NETDEV_TX_OK; 1917 } 1918 1919 /** 1920 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer 1921 * @skb: send buffer 1922 * @netdev: network interface device structure 1923 * 1924 * Returns NETDEV_TX_OK if sent, else an error code 1925 **/ 1926 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 1927 { 1928 struct i40e_netdev_priv *np = netdev_priv(netdev); 1929 struct i40e_vsi *vsi = np->vsi; 1930 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; 1931 1932 /* hardware can't handle really short frames, hardware padding works 1933 * beyond this point 1934 */ 1935 if (unlikely(skb->len < I40E_MIN_TX_LEN)) { 1936 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) 1937 return NETDEV_TX_OK; 1938 skb->len = I40E_MIN_TX_LEN; 1939 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); 1940 } 1941 1942 return i40e_xmit_frame_ring(skb, tx_ring); 1943 } 1944