1 /******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Driver 4 * Copyright(c) 2013 - 2016 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27 #include <linux/prefetch.h> 28 #include <net/busy_poll.h> 29 #include "i40e.h" 30 #include "i40e_prototype.h" 31 32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 33 u32 td_tag) 34 { 35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 40 } 41 42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 43 /** 44 * i40e_fdir - Generate a Flow Director descriptor based on fdata 45 * @tx_ring: Tx ring to send buffer on 46 * @fdata: Flow director filter data 47 * @add: Indicate if we are adding a rule or deleting one 48 * 49 **/ 50 static void i40e_fdir(struct i40e_ring *tx_ring, 51 struct i40e_fdir_filter *fdata, bool add) 52 { 53 struct i40e_filter_program_desc *fdir_desc; 54 struct i40e_pf *pf = tx_ring->vsi->back; 55 u32 flex_ptype, dtype_cmd; 56 u16 i; 57 58 /* grab the next descriptor */ 59 i = tx_ring->next_to_use; 60 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 61 62 i++; 63 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 64 65 flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & 66 (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); 67 68 flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & 69 (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); 70 71 flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & 72 (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); 73 74 /* Use LAN VSI Id if not programmed by user */ 75 flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & 76 ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << 77 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); 78 79 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; 80 81 dtype_cmd |= add ? 82 I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 83 I40E_TXD_FLTR_QW1_PCMD_SHIFT : 84 I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 85 I40E_TXD_FLTR_QW1_PCMD_SHIFT; 86 87 dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & 88 (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); 89 90 dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & 91 (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); 92 93 if (fdata->cnt_index) { 94 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 95 dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & 96 ((u32)fdata->cnt_index << 97 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); 98 } 99 100 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); 101 fdir_desc->rsvd = cpu_to_le32(0); 102 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 103 fdir_desc->fd_id = cpu_to_le32(fdata->fd_id); 104 } 105 106 #define I40E_FD_CLEAN_DELAY 10 107 /** 108 * i40e_program_fdir_filter - Program a Flow Director filter 109 * @fdir_data: Packet data that will be filter parameters 110 * @raw_packet: the pre-allocated packet buffer for FDir 111 * @pf: The PF pointer 112 * @add: True for add/update, False for remove 113 **/ 114 static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, 115 u8 *raw_packet, struct i40e_pf *pf, 116 bool add) 117 { 118 struct i40e_tx_buffer *tx_buf, *first; 119 struct i40e_tx_desc *tx_desc; 120 struct i40e_ring *tx_ring; 121 struct i40e_vsi *vsi; 122 struct device *dev; 123 dma_addr_t dma; 124 u32 td_cmd = 0; 125 u16 i; 126 127 /* find existing FDIR VSI */ 128 vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); 129 if (!vsi) 130 return -ENOENT; 131 132 tx_ring = vsi->tx_rings[0]; 133 dev = tx_ring->dev; 134 135 /* we need two descriptors to add/del a filter and we can wait */ 136 for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) { 137 if (!i) 138 return -EAGAIN; 139 msleep_interruptible(1); 140 } 141 142 dma = dma_map_single(dev, raw_packet, 143 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); 144 if (dma_mapping_error(dev, dma)) 145 goto dma_fail; 146 147 /* grab the next descriptor */ 148 i = tx_ring->next_to_use; 149 first = &tx_ring->tx_bi[i]; 150 i40e_fdir(tx_ring, fdir_data, add); 151 152 /* Now program a dummy descriptor */ 153 i = tx_ring->next_to_use; 154 tx_desc = I40E_TX_DESC(tx_ring, i); 155 tx_buf = &tx_ring->tx_bi[i]; 156 157 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; 158 159 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer)); 160 161 /* record length, and DMA address */ 162 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE); 163 dma_unmap_addr_set(tx_buf, dma, dma); 164 165 tx_desc->buffer_addr = cpu_to_le64(dma); 166 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; 167 168 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB; 169 tx_buf->raw_buf = (void *)raw_packet; 170 171 tx_desc->cmd_type_offset_bsz = 172 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0); 173 174 /* Force memory writes to complete before letting h/w 175 * know there are new descriptors to fetch. 176 */ 177 wmb(); 178 179 /* Mark the data descriptor to be watched */ 180 first->next_to_watch = tx_desc; 181 182 writel(tx_ring->next_to_use, tx_ring->tail); 183 return 0; 184 185 dma_fail: 186 return -1; 187 } 188 189 #define IP_HEADER_OFFSET 14 190 #define I40E_UDPIP_DUMMY_PACKET_LEN 42 191 /** 192 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters 193 * @vsi: pointer to the targeted VSI 194 * @fd_data: the flow director data required for the FDir descriptor 195 * @add: true adds a filter, false removes it 196 * 197 * Returns 0 if the filters were successfully added or removed 198 **/ 199 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, 200 struct i40e_fdir_filter *fd_data, 201 bool add) 202 { 203 struct i40e_pf *pf = vsi->back; 204 struct udphdr *udp; 205 struct iphdr *ip; 206 bool err = false; 207 u8 *raw_packet; 208 int ret; 209 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 210 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0, 211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 212 213 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 214 if (!raw_packet) 215 return -ENOMEM; 216 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN); 217 218 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 219 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET 220 + sizeof(struct iphdr)); 221 222 ip->daddr = fd_data->dst_ip[0]; 223 udp->dest = fd_data->dst_port; 224 ip->saddr = fd_data->src_ip[0]; 225 udp->source = fd_data->src_port; 226 227 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; 228 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 229 if (ret) { 230 dev_info(&pf->pdev->dev, 231 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 232 fd_data->pctype, fd_data->fd_id, ret); 233 err = true; 234 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { 235 if (add) 236 dev_info(&pf->pdev->dev, 237 "Filter OK for PCTYPE %d loc = %d\n", 238 fd_data->pctype, fd_data->fd_id); 239 else 240 dev_info(&pf->pdev->dev, 241 "Filter deleted for PCTYPE %d loc = %d\n", 242 fd_data->pctype, fd_data->fd_id); 243 } 244 if (err) 245 kfree(raw_packet); 246 247 return err ? -EOPNOTSUPP : 0; 248 } 249 250 #define I40E_TCPIP_DUMMY_PACKET_LEN 54 251 /** 252 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters 253 * @vsi: pointer to the targeted VSI 254 * @fd_data: the flow director data required for the FDir descriptor 255 * @add: true adds a filter, false removes it 256 * 257 * Returns 0 if the filters were successfully added or removed 258 **/ 259 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, 260 struct i40e_fdir_filter *fd_data, 261 bool add) 262 { 263 struct i40e_pf *pf = vsi->back; 264 struct tcphdr *tcp; 265 struct iphdr *ip; 266 bool err = false; 267 u8 *raw_packet; 268 int ret; 269 /* Dummy packet */ 270 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 271 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0, 272 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11, 273 0x0, 0x72, 0, 0, 0, 0}; 274 275 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 276 if (!raw_packet) 277 return -ENOMEM; 278 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN); 279 280 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 281 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET 282 + sizeof(struct iphdr)); 283 284 ip->daddr = fd_data->dst_ip[0]; 285 tcp->dest = fd_data->dst_port; 286 ip->saddr = fd_data->src_ip[0]; 287 tcp->source = fd_data->src_port; 288 289 if (add) { 290 pf->fd_tcp_rule++; 291 if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && 292 I40E_DEBUG_FD & pf->hw.debug_mask) 293 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); 294 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; 295 } else { 296 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? 297 (pf->fd_tcp_rule - 1) : 0; 298 if (pf->fd_tcp_rule == 0) { 299 if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && 300 I40E_DEBUG_FD & pf->hw.debug_mask) 301 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); 302 pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; 303 } 304 } 305 306 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; 307 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 308 309 if (ret) { 310 dev_info(&pf->pdev->dev, 311 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 312 fd_data->pctype, fd_data->fd_id, ret); 313 err = true; 314 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { 315 if (add) 316 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n", 317 fd_data->pctype, fd_data->fd_id); 318 else 319 dev_info(&pf->pdev->dev, 320 "Filter deleted for PCTYPE %d loc = %d\n", 321 fd_data->pctype, fd_data->fd_id); 322 } 323 324 if (err) 325 kfree(raw_packet); 326 327 return err ? -EOPNOTSUPP : 0; 328 } 329 330 #define I40E_IP_DUMMY_PACKET_LEN 34 331 /** 332 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for 333 * a specific flow spec 334 * @vsi: pointer to the targeted VSI 335 * @fd_data: the flow director data required for the FDir descriptor 336 * @add: true adds a filter, false removes it 337 * 338 * Returns 0 if the filters were successfully added or removed 339 **/ 340 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, 341 struct i40e_fdir_filter *fd_data, 342 bool add) 343 { 344 struct i40e_pf *pf = vsi->back; 345 struct iphdr *ip; 346 bool err = false; 347 u8 *raw_packet; 348 int ret; 349 int i; 350 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 351 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0, 352 0, 0, 0, 0}; 353 354 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; 355 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) { 356 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 357 if (!raw_packet) 358 return -ENOMEM; 359 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); 360 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 361 362 ip->saddr = fd_data->src_ip[0]; 363 ip->daddr = fd_data->dst_ip[0]; 364 ip->protocol = 0; 365 366 fd_data->pctype = i; 367 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 368 369 if (ret) { 370 dev_info(&pf->pdev->dev, 371 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 372 fd_data->pctype, fd_data->fd_id, ret); 373 err = true; 374 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { 375 if (add) 376 dev_info(&pf->pdev->dev, 377 "Filter OK for PCTYPE %d loc = %d\n", 378 fd_data->pctype, fd_data->fd_id); 379 else 380 dev_info(&pf->pdev->dev, 381 "Filter deleted for PCTYPE %d loc = %d\n", 382 fd_data->pctype, fd_data->fd_id); 383 } 384 } 385 386 if (err) 387 kfree(raw_packet); 388 389 return err ? -EOPNOTSUPP : 0; 390 } 391 392 /** 393 * i40e_add_del_fdir - Build raw packets to add/del fdir filter 394 * @vsi: pointer to the targeted VSI 395 * @cmd: command to get or set RX flow classification rules 396 * @add: true adds a filter, false removes it 397 * 398 **/ 399 int i40e_add_del_fdir(struct i40e_vsi *vsi, 400 struct i40e_fdir_filter *input, bool add) 401 { 402 struct i40e_pf *pf = vsi->back; 403 int ret; 404 405 switch (input->flow_type & ~FLOW_EXT) { 406 case TCP_V4_FLOW: 407 ret = i40e_add_del_fdir_tcpv4(vsi, input, add); 408 break; 409 case UDP_V4_FLOW: 410 ret = i40e_add_del_fdir_udpv4(vsi, input, add); 411 break; 412 case IP_USER_FLOW: 413 switch (input->ip4_proto) { 414 case IPPROTO_TCP: 415 ret = i40e_add_del_fdir_tcpv4(vsi, input, add); 416 break; 417 case IPPROTO_UDP: 418 ret = i40e_add_del_fdir_udpv4(vsi, input, add); 419 break; 420 case IPPROTO_IP: 421 ret = i40e_add_del_fdir_ipv4(vsi, input, add); 422 break; 423 default: 424 /* We cannot support masking based on protocol */ 425 goto unsupported_flow; 426 } 427 break; 428 default: 429 unsupported_flow: 430 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", 431 input->flow_type); 432 ret = -EINVAL; 433 } 434 435 /* The buffer allocated here will be normally be freed by 436 * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit 437 * completion. In the event of an error adding the buffer to the FDIR 438 * ring, it will immediately be freed. It may also be freed by 439 * i40e_clean_tx_ring() when closing the VSI. 440 */ 441 return ret; 442 } 443 444 /** 445 * i40e_fd_handle_status - check the Programming Status for FD 446 * @rx_ring: the Rx ring for this descriptor 447 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor. 448 * @prog_id: the id originally used for programming 449 * 450 * This is used to verify if the FD programming or invalidation 451 * requested by SW to the HW is successful or not and take actions accordingly. 452 **/ 453 static void i40e_fd_handle_status(struct i40e_ring *rx_ring, 454 union i40e_rx_desc *rx_desc, u8 prog_id) 455 { 456 struct i40e_pf *pf = rx_ring->vsi->back; 457 struct pci_dev *pdev = pf->pdev; 458 u32 fcnt_prog, fcnt_avail; 459 u32 error; 460 u64 qw; 461 462 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 463 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> 464 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; 465 466 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { 467 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id); 468 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) || 469 (I40E_DEBUG_FD & pf->hw.debug_mask)) 470 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", 471 pf->fd_inv); 472 473 /* Check if the programming error is for ATR. 474 * If so, auto disable ATR and set a state for 475 * flush in progress. Next time we come here if flush is in 476 * progress do nothing, once flush is complete the state will 477 * be cleared. 478 */ 479 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) 480 return; 481 482 pf->fd_add_err++; 483 /* store the current atr filter count */ 484 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf); 485 486 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) && 487 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) { 488 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; 489 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); 490 } 491 492 /* filter programming failed most likely due to table full */ 493 fcnt_prog = i40e_get_global_fd_count(pf); 494 fcnt_avail = pf->fdir_pf_filter_count; 495 /* If ATR is running fcnt_prog can quickly change, 496 * if we are very close to full, it makes sense to disable 497 * FD ATR/SB and then re-enable it when there is room. 498 */ 499 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { 500 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && 501 !(pf->auto_disable_flags & 502 I40E_FLAG_FD_SB_ENABLED)) { 503 if (I40E_DEBUG_FD & pf->hw.debug_mask) 504 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); 505 pf->auto_disable_flags |= 506 I40E_FLAG_FD_SB_ENABLED; 507 } 508 } 509 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { 510 if (I40E_DEBUG_FD & pf->hw.debug_mask) 511 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n", 512 rx_desc->wb.qword0.hi_dword.fd_id); 513 } 514 } 515 516 /** 517 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 518 * @ring: the ring that owns the buffer 519 * @tx_buffer: the buffer to free 520 **/ 521 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 522 struct i40e_tx_buffer *tx_buffer) 523 { 524 if (tx_buffer->skb) { 525 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) 526 kfree(tx_buffer->raw_buf); 527 else 528 dev_kfree_skb_any(tx_buffer->skb); 529 if (dma_unmap_len(tx_buffer, len)) 530 dma_unmap_single(ring->dev, 531 dma_unmap_addr(tx_buffer, dma), 532 dma_unmap_len(tx_buffer, len), 533 DMA_TO_DEVICE); 534 } else if (dma_unmap_len(tx_buffer, len)) { 535 dma_unmap_page(ring->dev, 536 dma_unmap_addr(tx_buffer, dma), 537 dma_unmap_len(tx_buffer, len), 538 DMA_TO_DEVICE); 539 } 540 541 tx_buffer->next_to_watch = NULL; 542 tx_buffer->skb = NULL; 543 dma_unmap_len_set(tx_buffer, len, 0); 544 /* tx_buffer must be completely set up in the transmit path */ 545 } 546 547 /** 548 * i40e_clean_tx_ring - Free any empty Tx buffers 549 * @tx_ring: ring to be cleaned 550 **/ 551 void i40e_clean_tx_ring(struct i40e_ring *tx_ring) 552 { 553 unsigned long bi_size; 554 u16 i; 555 556 /* ring already cleared, nothing to do */ 557 if (!tx_ring->tx_bi) 558 return; 559 560 /* Free all the Tx ring sk_buffs */ 561 for (i = 0; i < tx_ring->count; i++) 562 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 563 564 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 565 memset(tx_ring->tx_bi, 0, bi_size); 566 567 /* Zero out the descriptor ring */ 568 memset(tx_ring->desc, 0, tx_ring->size); 569 570 tx_ring->next_to_use = 0; 571 tx_ring->next_to_clean = 0; 572 573 if (!tx_ring->netdev) 574 return; 575 576 /* cleanup Tx queue statistics */ 577 netdev_tx_reset_queue(txring_txq(tx_ring)); 578 } 579 580 /** 581 * i40e_free_tx_resources - Free Tx resources per queue 582 * @tx_ring: Tx descriptor ring for a specific queue 583 * 584 * Free all transmit software resources 585 **/ 586 void i40e_free_tx_resources(struct i40e_ring *tx_ring) 587 { 588 i40e_clean_tx_ring(tx_ring); 589 kfree(tx_ring->tx_bi); 590 tx_ring->tx_bi = NULL; 591 592 if (tx_ring->desc) { 593 dma_free_coherent(tx_ring->dev, tx_ring->size, 594 tx_ring->desc, tx_ring->dma); 595 tx_ring->desc = NULL; 596 } 597 } 598 599 /** 600 * i40e_get_tx_pending - how many tx descriptors not processed 601 * @tx_ring: the ring of descriptors 602 * @in_sw: is tx_pending being checked in SW or HW 603 * 604 * Since there is no access to the ring head register 605 * in XL710, we need to use our local copies 606 **/ 607 u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) 608 { 609 u32 head, tail; 610 611 if (!in_sw) 612 head = i40e_get_head(ring); 613 else 614 head = ring->next_to_clean; 615 tail = readl(ring->tail); 616 617 if (head != tail) 618 return (head < tail) ? 619 tail - head : (tail + ring->count - head); 620 621 return 0; 622 } 623 624 #define WB_STRIDE 4 625 626 /** 627 * i40e_clean_tx_irq - Reclaim resources after transmit completes 628 * @vsi: the VSI we care about 629 * @tx_ring: Tx ring to clean 630 * @napi_budget: Used to determine if we are in netpoll 631 * 632 * Returns true if there's any budget left (e.g. the clean is finished) 633 **/ 634 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, 635 struct i40e_ring *tx_ring, int napi_budget) 636 { 637 u16 i = tx_ring->next_to_clean; 638 struct i40e_tx_buffer *tx_buf; 639 struct i40e_tx_desc *tx_head; 640 struct i40e_tx_desc *tx_desc; 641 unsigned int total_bytes = 0, total_packets = 0; 642 unsigned int budget = vsi->work_limit; 643 644 tx_buf = &tx_ring->tx_bi[i]; 645 tx_desc = I40E_TX_DESC(tx_ring, i); 646 i -= tx_ring->count; 647 648 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); 649 650 do { 651 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 652 653 /* if next_to_watch is not set then there is no work pending */ 654 if (!eop_desc) 655 break; 656 657 /* prevent any other reads prior to eop_desc */ 658 read_barrier_depends(); 659 660 /* we have caught up to head, no work left to do */ 661 if (tx_head == tx_desc) 662 break; 663 664 /* clear next_to_watch to prevent false hangs */ 665 tx_buf->next_to_watch = NULL; 666 667 /* update the statistics for this packet */ 668 total_bytes += tx_buf->bytecount; 669 total_packets += tx_buf->gso_segs; 670 671 /* free the skb */ 672 napi_consume_skb(tx_buf->skb, napi_budget); 673 674 /* unmap skb header data */ 675 dma_unmap_single(tx_ring->dev, 676 dma_unmap_addr(tx_buf, dma), 677 dma_unmap_len(tx_buf, len), 678 DMA_TO_DEVICE); 679 680 /* clear tx_buffer data */ 681 tx_buf->skb = NULL; 682 dma_unmap_len_set(tx_buf, len, 0); 683 684 /* unmap remaining buffers */ 685 while (tx_desc != eop_desc) { 686 687 tx_buf++; 688 tx_desc++; 689 i++; 690 if (unlikely(!i)) { 691 i -= tx_ring->count; 692 tx_buf = tx_ring->tx_bi; 693 tx_desc = I40E_TX_DESC(tx_ring, 0); 694 } 695 696 /* unmap any remaining paged data */ 697 if (dma_unmap_len(tx_buf, len)) { 698 dma_unmap_page(tx_ring->dev, 699 dma_unmap_addr(tx_buf, dma), 700 dma_unmap_len(tx_buf, len), 701 DMA_TO_DEVICE); 702 dma_unmap_len_set(tx_buf, len, 0); 703 } 704 } 705 706 /* move us one more past the eop_desc for start of next pkt */ 707 tx_buf++; 708 tx_desc++; 709 i++; 710 if (unlikely(!i)) { 711 i -= tx_ring->count; 712 tx_buf = tx_ring->tx_bi; 713 tx_desc = I40E_TX_DESC(tx_ring, 0); 714 } 715 716 prefetch(tx_desc); 717 718 /* update budget accounting */ 719 budget--; 720 } while (likely(budget)); 721 722 i += tx_ring->count; 723 tx_ring->next_to_clean = i; 724 u64_stats_update_begin(&tx_ring->syncp); 725 tx_ring->stats.bytes += total_bytes; 726 tx_ring->stats.packets += total_packets; 727 u64_stats_update_end(&tx_ring->syncp); 728 tx_ring->q_vector->tx.total_bytes += total_bytes; 729 tx_ring->q_vector->tx.total_packets += total_packets; 730 731 if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { 732 /* check to see if there are < 4 descriptors 733 * waiting to be written back, then kick the hardware to force 734 * them to be written back in case we stay in NAPI. 735 * In this mode on X722 we do not enable Interrupt. 736 */ 737 unsigned int j = i40e_get_tx_pending(tx_ring, false); 738 739 if (budget && 740 ((j / WB_STRIDE) == 0) && (j > 0) && 741 !test_bit(__I40E_DOWN, &vsi->state) && 742 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) 743 tx_ring->arm_wb = true; 744 } 745 746 /* notify netdev of completed buffers */ 747 netdev_tx_completed_queue(txring_txq(tx_ring), 748 total_packets, total_bytes); 749 750 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 751 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 752 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 753 /* Make sure that anybody stopping the queue after this 754 * sees the new next_to_clean. 755 */ 756 smp_mb(); 757 if (__netif_subqueue_stopped(tx_ring->netdev, 758 tx_ring->queue_index) && 759 !test_bit(__I40E_DOWN, &vsi->state)) { 760 netif_wake_subqueue(tx_ring->netdev, 761 tx_ring->queue_index); 762 ++tx_ring->tx_stats.restart_queue; 763 } 764 } 765 766 return !!budget; 767 } 768 769 /** 770 * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled 771 * @vsi: the VSI we care about 772 * @q_vector: the vector on which to enable writeback 773 * 774 **/ 775 static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, 776 struct i40e_q_vector *q_vector) 777 { 778 u16 flags = q_vector->tx.ring[0].flags; 779 u32 val; 780 781 if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) 782 return; 783 784 if (q_vector->arm_wb_state) 785 return; 786 787 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 788 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | 789 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ 790 791 wr32(&vsi->back->hw, 792 I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), 793 val); 794 } else { 795 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK | 796 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */ 797 798 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); 799 } 800 q_vector->arm_wb_state = true; 801 } 802 803 /** 804 * i40e_force_wb - Issue SW Interrupt so HW does a wb 805 * @vsi: the VSI we care about 806 * @q_vector: the vector on which to force writeback 807 * 808 **/ 809 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) 810 { 811 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 812 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | 813 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ 814 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | 815 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK; 816 /* allow 00 to be written to the index */ 817 818 wr32(&vsi->back->hw, 819 I40E_PFINT_DYN_CTLN(q_vector->v_idx + 820 vsi->base_vector - 1), val); 821 } else { 822 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK | 823 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */ 824 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK | 825 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK; 826 /* allow 00 to be written to the index */ 827 828 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); 829 } 830 } 831 832 /** 833 * i40e_set_new_dynamic_itr - Find new ITR level 834 * @rc: structure containing ring performance data 835 * 836 * Returns true if ITR changed, false if not 837 * 838 * Stores a new ITR value based on packets and byte counts during 839 * the last interrupt. The advantage of per interrupt computation 840 * is faster updates and more accurate ITR for the current traffic 841 * pattern. Constants in this function were computed based on 842 * theoretical maximum wire speed and thresholds were set based on 843 * testing data as well as attempting to minimize response time 844 * while increasing bulk throughput. 845 **/ 846 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 847 { 848 enum i40e_latency_range new_latency_range = rc->latency_range; 849 struct i40e_q_vector *qv = rc->ring->q_vector; 850 u32 new_itr = rc->itr; 851 int bytes_per_int; 852 int usecs; 853 854 if (rc->total_packets == 0 || !rc->itr) 855 return false; 856 857 /* simple throttlerate management 858 * 0-10MB/s lowest (50000 ints/s) 859 * 10-20MB/s low (20000 ints/s) 860 * 20-1249MB/s bulk (18000 ints/s) 861 * > 40000 Rx packets per second (8000 ints/s) 862 * 863 * The math works out because the divisor is in 10^(-6) which 864 * turns the bytes/us input value into MB/s values, but 865 * make sure to use usecs, as the register values written 866 * are in 2 usec increments in the ITR registers, and make sure 867 * to use the smoothed values that the countdown timer gives us. 868 */ 869 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; 870 bytes_per_int = rc->total_bytes / usecs; 871 872 switch (new_latency_range) { 873 case I40E_LOWEST_LATENCY: 874 if (bytes_per_int > 10) 875 new_latency_range = I40E_LOW_LATENCY; 876 break; 877 case I40E_LOW_LATENCY: 878 if (bytes_per_int > 20) 879 new_latency_range = I40E_BULK_LATENCY; 880 else if (bytes_per_int <= 10) 881 new_latency_range = I40E_LOWEST_LATENCY; 882 break; 883 case I40E_BULK_LATENCY: 884 case I40E_ULTRA_LATENCY: 885 default: 886 if (bytes_per_int <= 20) 887 new_latency_range = I40E_LOW_LATENCY; 888 break; 889 } 890 891 /* this is to adjust RX more aggressively when streaming small 892 * packets. The value of 40000 was picked as it is just beyond 893 * what the hardware can receive per second if in low latency 894 * mode. 895 */ 896 #define RX_ULTRA_PACKET_RATE 40000 897 898 if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && 899 (&qv->rx == rc)) 900 new_latency_range = I40E_ULTRA_LATENCY; 901 902 rc->latency_range = new_latency_range; 903 904 switch (new_latency_range) { 905 case I40E_LOWEST_LATENCY: 906 new_itr = I40E_ITR_50K; 907 break; 908 case I40E_LOW_LATENCY: 909 new_itr = I40E_ITR_20K; 910 break; 911 case I40E_BULK_LATENCY: 912 new_itr = I40E_ITR_18K; 913 break; 914 case I40E_ULTRA_LATENCY: 915 new_itr = I40E_ITR_8K; 916 break; 917 default: 918 break; 919 } 920 921 rc->total_bytes = 0; 922 rc->total_packets = 0; 923 924 if (new_itr != rc->itr) { 925 rc->itr = new_itr; 926 return true; 927 } 928 929 return false; 930 } 931 932 /** 933 * i40e_clean_programming_status - clean the programming status descriptor 934 * @rx_ring: the rx ring that has this descriptor 935 * @rx_desc: the rx descriptor written back by HW 936 * 937 * Flow director should handle FD_FILTER_STATUS to check its filter programming 938 * status being successful or not and take actions accordingly. FCoE should 939 * handle its context/filter programming/invalidation status and take actions. 940 * 941 **/ 942 static void i40e_clean_programming_status(struct i40e_ring *rx_ring, 943 union i40e_rx_desc *rx_desc) 944 { 945 u64 qw; 946 u8 id; 947 948 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 949 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> 950 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; 951 952 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) 953 i40e_fd_handle_status(rx_ring, rx_desc, id); 954 #ifdef I40E_FCOE 955 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) || 956 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS)) 957 i40e_fcoe_handle_status(rx_ring, rx_desc, id); 958 #endif 959 } 960 961 /** 962 * i40e_setup_tx_descriptors - Allocate the Tx descriptors 963 * @tx_ring: the tx ring to set up 964 * 965 * Return 0 on success, negative on error 966 **/ 967 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) 968 { 969 struct device *dev = tx_ring->dev; 970 int bi_size; 971 972 if (!dev) 973 return -ENOMEM; 974 975 /* warn if we are about to overwrite the pointer */ 976 WARN_ON(tx_ring->tx_bi); 977 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 978 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 979 if (!tx_ring->tx_bi) 980 goto err; 981 982 /* round up to nearest 4K */ 983 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 984 /* add u32 for head writeback, align after this takes care of 985 * guaranteeing this is at least one cache line in size 986 */ 987 tx_ring->size += sizeof(u32); 988 tx_ring->size = ALIGN(tx_ring->size, 4096); 989 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 990 &tx_ring->dma, GFP_KERNEL); 991 if (!tx_ring->desc) { 992 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 993 tx_ring->size); 994 goto err; 995 } 996 997 tx_ring->next_to_use = 0; 998 tx_ring->next_to_clean = 0; 999 return 0; 1000 1001 err: 1002 kfree(tx_ring->tx_bi); 1003 tx_ring->tx_bi = NULL; 1004 return -ENOMEM; 1005 } 1006 1007 /** 1008 * i40e_clean_rx_ring - Free Rx buffers 1009 * @rx_ring: ring to be cleaned 1010 **/ 1011 void i40e_clean_rx_ring(struct i40e_ring *rx_ring) 1012 { 1013 struct device *dev = rx_ring->dev; 1014 unsigned long bi_size; 1015 u16 i; 1016 1017 /* ring already cleared, nothing to do */ 1018 if (!rx_ring->rx_bi) 1019 return; 1020 1021 if (rx_ring->skb) { 1022 dev_kfree_skb(rx_ring->skb); 1023 rx_ring->skb = NULL; 1024 } 1025 1026 /* Free all the Rx ring sk_buffs */ 1027 for (i = 0; i < rx_ring->count; i++) { 1028 struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; 1029 1030 if (!rx_bi->page) 1031 continue; 1032 1033 dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE); 1034 __free_pages(rx_bi->page, 0); 1035 1036 rx_bi->page = NULL; 1037 rx_bi->page_offset = 0; 1038 } 1039 1040 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 1041 memset(rx_ring->rx_bi, 0, bi_size); 1042 1043 /* Zero out the descriptor ring */ 1044 memset(rx_ring->desc, 0, rx_ring->size); 1045 1046 rx_ring->next_to_alloc = 0; 1047 rx_ring->next_to_clean = 0; 1048 rx_ring->next_to_use = 0; 1049 } 1050 1051 /** 1052 * i40e_free_rx_resources - Free Rx resources 1053 * @rx_ring: ring to clean the resources from 1054 * 1055 * Free all receive software resources 1056 **/ 1057 void i40e_free_rx_resources(struct i40e_ring *rx_ring) 1058 { 1059 i40e_clean_rx_ring(rx_ring); 1060 kfree(rx_ring->rx_bi); 1061 rx_ring->rx_bi = NULL; 1062 1063 if (rx_ring->desc) { 1064 dma_free_coherent(rx_ring->dev, rx_ring->size, 1065 rx_ring->desc, rx_ring->dma); 1066 rx_ring->desc = NULL; 1067 } 1068 } 1069 1070 /** 1071 * i40e_setup_rx_descriptors - Allocate Rx descriptors 1072 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 1073 * 1074 * Returns 0 on success, negative on failure 1075 **/ 1076 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) 1077 { 1078 struct device *dev = rx_ring->dev; 1079 int bi_size; 1080 1081 /* warn if we are about to overwrite the pointer */ 1082 WARN_ON(rx_ring->rx_bi); 1083 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 1084 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 1085 if (!rx_ring->rx_bi) 1086 goto err; 1087 1088 u64_stats_init(&rx_ring->syncp); 1089 1090 /* Round up to nearest 4K */ 1091 rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc); 1092 rx_ring->size = ALIGN(rx_ring->size, 4096); 1093 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 1094 &rx_ring->dma, GFP_KERNEL); 1095 1096 if (!rx_ring->desc) { 1097 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 1098 rx_ring->size); 1099 goto err; 1100 } 1101 1102 rx_ring->next_to_alloc = 0; 1103 rx_ring->next_to_clean = 0; 1104 rx_ring->next_to_use = 0; 1105 1106 return 0; 1107 err: 1108 kfree(rx_ring->rx_bi); 1109 rx_ring->rx_bi = NULL; 1110 return -ENOMEM; 1111 } 1112 1113 /** 1114 * i40e_release_rx_desc - Store the new tail and head values 1115 * @rx_ring: ring to bump 1116 * @val: new head index 1117 **/ 1118 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 1119 { 1120 rx_ring->next_to_use = val; 1121 1122 /* update next to alloc since we have filled the ring */ 1123 rx_ring->next_to_alloc = val; 1124 1125 /* Force memory writes to complete before letting h/w 1126 * know there are new descriptors to fetch. (Only 1127 * applicable for weak-ordered memory model archs, 1128 * such as IA-64). 1129 */ 1130 wmb(); 1131 writel(val, rx_ring->tail); 1132 } 1133 1134 /** 1135 * i40e_alloc_mapped_page - recycle or make a new page 1136 * @rx_ring: ring to use 1137 * @bi: rx_buffer struct to modify 1138 * 1139 * Returns true if the page was successfully allocated or 1140 * reused. 1141 **/ 1142 static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, 1143 struct i40e_rx_buffer *bi) 1144 { 1145 struct page *page = bi->page; 1146 dma_addr_t dma; 1147 1148 /* since we are recycling buffers we should seldom need to alloc */ 1149 if (likely(page)) { 1150 rx_ring->rx_stats.page_reuse_count++; 1151 return true; 1152 } 1153 1154 /* alloc new page for storage */ 1155 page = dev_alloc_page(); 1156 if (unlikely(!page)) { 1157 rx_ring->rx_stats.alloc_page_failed++; 1158 return false; 1159 } 1160 1161 /* map page for use */ 1162 dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1163 1164 /* if mapping failed free memory back to system since 1165 * there isn't much point in holding memory we can't use 1166 */ 1167 if (dma_mapping_error(rx_ring->dev, dma)) { 1168 __free_pages(page, 0); 1169 rx_ring->rx_stats.alloc_page_failed++; 1170 return false; 1171 } 1172 1173 bi->dma = dma; 1174 bi->page = page; 1175 bi->page_offset = 0; 1176 1177 return true; 1178 } 1179 1180 /** 1181 * i40e_receive_skb - Send a completed packet up the stack 1182 * @rx_ring: rx ring in play 1183 * @skb: packet to send up 1184 * @vlan_tag: vlan tag for packet 1185 **/ 1186 static void i40e_receive_skb(struct i40e_ring *rx_ring, 1187 struct sk_buff *skb, u16 vlan_tag) 1188 { 1189 struct i40e_q_vector *q_vector = rx_ring->q_vector; 1190 1191 if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1192 (vlan_tag & VLAN_VID_MASK)) 1193 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 1194 1195 napi_gro_receive(&q_vector->napi, skb); 1196 } 1197 1198 /** 1199 * i40e_alloc_rx_buffers - Replace used receive buffers 1200 * @rx_ring: ring to place buffers on 1201 * @cleaned_count: number of buffers to replace 1202 * 1203 * Returns false if all allocations were successful, true if any fail 1204 **/ 1205 bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) 1206 { 1207 u16 ntu = rx_ring->next_to_use; 1208 union i40e_rx_desc *rx_desc; 1209 struct i40e_rx_buffer *bi; 1210 1211 /* do nothing if no valid netdev defined */ 1212 if (!rx_ring->netdev || !cleaned_count) 1213 return false; 1214 1215 rx_desc = I40E_RX_DESC(rx_ring, ntu); 1216 bi = &rx_ring->rx_bi[ntu]; 1217 1218 do { 1219 if (!i40e_alloc_mapped_page(rx_ring, bi)) 1220 goto no_buffers; 1221 1222 /* Refresh the desc even if buffer_addrs didn't change 1223 * because each write-back erases this info. 1224 */ 1225 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 1226 1227 rx_desc++; 1228 bi++; 1229 ntu++; 1230 if (unlikely(ntu == rx_ring->count)) { 1231 rx_desc = I40E_RX_DESC(rx_ring, 0); 1232 bi = rx_ring->rx_bi; 1233 ntu = 0; 1234 } 1235 1236 /* clear the status bits for the next_to_use descriptor */ 1237 rx_desc->wb.qword1.status_error_len = 0; 1238 1239 cleaned_count--; 1240 } while (cleaned_count); 1241 1242 if (rx_ring->next_to_use != ntu) 1243 i40e_release_rx_desc(rx_ring, ntu); 1244 1245 return false; 1246 1247 no_buffers: 1248 if (rx_ring->next_to_use != ntu) 1249 i40e_release_rx_desc(rx_ring, ntu); 1250 1251 /* make sure to come back via polling to try again after 1252 * allocation failure 1253 */ 1254 return true; 1255 } 1256 1257 /** 1258 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 1259 * @vsi: the VSI we care about 1260 * @skb: skb currently being received and modified 1261 * @rx_desc: the receive descriptor 1262 * 1263 * skb->protocol must be set before this function is called 1264 **/ 1265 static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 1266 struct sk_buff *skb, 1267 union i40e_rx_desc *rx_desc) 1268 { 1269 struct i40e_rx_ptype_decoded decoded; 1270 u32 rx_error, rx_status; 1271 bool ipv4, ipv6; 1272 u8 ptype; 1273 u64 qword; 1274 1275 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1276 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; 1277 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1278 I40E_RXD_QW1_ERROR_SHIFT; 1279 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1280 I40E_RXD_QW1_STATUS_SHIFT; 1281 decoded = decode_rx_desc_ptype(ptype); 1282 1283 skb->ip_summed = CHECKSUM_NONE; 1284 1285 skb_checksum_none_assert(skb); 1286 1287 /* Rx csum enabled and ip headers found? */ 1288 if (!(vsi->netdev->features & NETIF_F_RXCSUM)) 1289 return; 1290 1291 /* did the hardware decode the packet and checksum? */ 1292 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 1293 return; 1294 1295 /* both known and outer_ip must be set for the below code to work */ 1296 if (!(decoded.known && decoded.outer_ip)) 1297 return; 1298 1299 ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && 1300 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); 1301 ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && 1302 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); 1303 1304 if (ipv4 && 1305 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | 1306 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) 1307 goto checksum_fail; 1308 1309 /* likely incorrect csum if alternate IP extension headers found */ 1310 if (ipv6 && 1311 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 1312 /* don't increment checksum err here, non-fatal err */ 1313 return; 1314 1315 /* there was some L4 error, count error and punt packet to the stack */ 1316 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) 1317 goto checksum_fail; 1318 1319 /* handle packets that were not able to be checksummed due 1320 * to arrival speed, in this case the stack can compute 1321 * the csum. 1322 */ 1323 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) 1324 return; 1325 1326 /* If there is an outer header present that might contain a checksum 1327 * we need to bump the checksum level by 1 to reflect the fact that 1328 * we are indicating we validated the inner checksum. 1329 */ 1330 if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT) 1331 skb->csum_level = 1; 1332 1333 /* Only report checksum unnecessary for TCP, UDP, or SCTP */ 1334 switch (decoded.inner_prot) { 1335 case I40E_RX_PTYPE_INNER_PROT_TCP: 1336 case I40E_RX_PTYPE_INNER_PROT_UDP: 1337 case I40E_RX_PTYPE_INNER_PROT_SCTP: 1338 skb->ip_summed = CHECKSUM_UNNECESSARY; 1339 /* fall though */ 1340 default: 1341 break; 1342 } 1343 1344 return; 1345 1346 checksum_fail: 1347 vsi->back->hw_csum_rx_error++; 1348 } 1349 1350 /** 1351 * i40e_ptype_to_htype - get a hash type 1352 * @ptype: the ptype value from the descriptor 1353 * 1354 * Returns a hash type to be used by skb_set_hash 1355 **/ 1356 static inline int i40e_ptype_to_htype(u8 ptype) 1357 { 1358 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); 1359 1360 if (!decoded.known) 1361 return PKT_HASH_TYPE_NONE; 1362 1363 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1364 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) 1365 return PKT_HASH_TYPE_L4; 1366 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1367 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) 1368 return PKT_HASH_TYPE_L3; 1369 else 1370 return PKT_HASH_TYPE_L2; 1371 } 1372 1373 /** 1374 * i40e_rx_hash - set the hash value in the skb 1375 * @ring: descriptor ring 1376 * @rx_desc: specific descriptor 1377 **/ 1378 static inline void i40e_rx_hash(struct i40e_ring *ring, 1379 union i40e_rx_desc *rx_desc, 1380 struct sk_buff *skb, 1381 u8 rx_ptype) 1382 { 1383 u32 hash; 1384 const __le64 rss_mask = 1385 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 1386 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 1387 1388 if (!(ring->netdev->features & NETIF_F_RXHASH)) 1389 return; 1390 1391 if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { 1392 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 1393 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); 1394 } 1395 } 1396 1397 /** 1398 * i40e_process_skb_fields - Populate skb header fields from Rx descriptor 1399 * @rx_ring: rx descriptor ring packet is being transacted on 1400 * @rx_desc: pointer to the EOP Rx descriptor 1401 * @skb: pointer to current skb being populated 1402 * @rx_ptype: the packet type decoded by hardware 1403 * 1404 * This function checks the ring, descriptor, and packet information in 1405 * order to populate the hash, checksum, VLAN, protocol, and 1406 * other fields within the skb. 1407 **/ 1408 static inline 1409 void i40e_process_skb_fields(struct i40e_ring *rx_ring, 1410 union i40e_rx_desc *rx_desc, struct sk_buff *skb, 1411 u8 rx_ptype) 1412 { 1413 u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1414 u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1415 I40E_RXD_QW1_STATUS_SHIFT; 1416 u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK; 1417 u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> 1418 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT; 1419 1420 if (unlikely(tsynvalid)) 1421 i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn); 1422 1423 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); 1424 1425 /* modifies the skb - consumes the enet header */ 1426 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1427 1428 i40e_rx_checksum(rx_ring->vsi, skb, rx_desc); 1429 1430 skb_record_rx_queue(skb, rx_ring->queue_index); 1431 } 1432 1433 /** 1434 * i40e_cleanup_headers - Correct empty headers 1435 * @rx_ring: rx descriptor ring packet is being transacted on 1436 * @skb: pointer to current skb being fixed 1437 * 1438 * Also address the case where we are pulling data in on pages only 1439 * and as such no data is present in the skb header. 1440 * 1441 * In addition if skb is not at least 60 bytes we need to pad it so that 1442 * it is large enough to qualify as a valid Ethernet frame. 1443 * 1444 * Returns true if an error was encountered and skb was freed. 1445 **/ 1446 static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) 1447 { 1448 /* if eth_skb_pad returns an error the skb was freed */ 1449 if (eth_skb_pad(skb)) 1450 return true; 1451 1452 return false; 1453 } 1454 1455 /** 1456 * i40e_reuse_rx_page - page flip buffer and store it back on the ring 1457 * @rx_ring: rx descriptor ring to store buffers on 1458 * @old_buff: donor buffer to have page reused 1459 * 1460 * Synchronizes page for reuse by the adapter 1461 **/ 1462 static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, 1463 struct i40e_rx_buffer *old_buff) 1464 { 1465 struct i40e_rx_buffer *new_buff; 1466 u16 nta = rx_ring->next_to_alloc; 1467 1468 new_buff = &rx_ring->rx_bi[nta]; 1469 1470 /* update, and store next to alloc */ 1471 nta++; 1472 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 1473 1474 /* transfer page from old buffer to new buffer */ 1475 *new_buff = *old_buff; 1476 } 1477 1478 /** 1479 * i40e_page_is_reusable - check if any reuse is possible 1480 * @page: page struct to check 1481 * 1482 * A page is not reusable if it was allocated under low memory 1483 * conditions, or it's not in the same NUMA node as this CPU. 1484 */ 1485 static inline bool i40e_page_is_reusable(struct page *page) 1486 { 1487 return (page_to_nid(page) == numa_mem_id()) && 1488 !page_is_pfmemalloc(page); 1489 } 1490 1491 /** 1492 * i40e_can_reuse_rx_page - Determine if this page can be reused by 1493 * the adapter for another receive 1494 * 1495 * @rx_buffer: buffer containing the page 1496 * @page: page address from rx_buffer 1497 * @truesize: actual size of the buffer in this page 1498 * 1499 * If page is reusable, rx_buffer->page_offset is adjusted to point to 1500 * an unused region in the page. 1501 * 1502 * For small pages, @truesize will be a constant value, half the size 1503 * of the memory at page. We'll attempt to alternate between high and 1504 * low halves of the page, with one half ready for use by the hardware 1505 * and the other half being consumed by the stack. We use the page 1506 * ref count to determine whether the stack has finished consuming the 1507 * portion of this page that was passed up with a previous packet. If 1508 * the page ref count is >1, we'll assume the "other" half page is 1509 * still busy, and this page cannot be reused. 1510 * 1511 * For larger pages, @truesize will be the actual space used by the 1512 * received packet (adjusted upward to an even multiple of the cache 1513 * line size). This will advance through the page by the amount 1514 * actually consumed by the received packets while there is still 1515 * space for a buffer. Each region of larger pages will be used at 1516 * most once, after which the page will not be reused. 1517 * 1518 * In either case, if the page is reusable its refcount is increased. 1519 **/ 1520 static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, 1521 struct page *page, 1522 const unsigned int truesize) 1523 { 1524 #if (PAGE_SIZE >= 8192) 1525 unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; 1526 #endif 1527 1528 /* Is any reuse possible? */ 1529 if (unlikely(!i40e_page_is_reusable(page))) 1530 return false; 1531 1532 #if (PAGE_SIZE < 8192) 1533 /* if we are only owner of page we can reuse it */ 1534 if (unlikely(page_count(page) != 1)) 1535 return false; 1536 1537 /* flip page offset to other buffer */ 1538 rx_buffer->page_offset ^= truesize; 1539 #else 1540 /* move offset up to the next cache line */ 1541 rx_buffer->page_offset += truesize; 1542 1543 if (rx_buffer->page_offset > last_offset) 1544 return false; 1545 #endif 1546 1547 /* Inc ref count on page before passing it up to the stack */ 1548 get_page(page); 1549 1550 return true; 1551 } 1552 1553 /** 1554 * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff 1555 * @rx_ring: rx descriptor ring to transact packets on 1556 * @rx_buffer: buffer containing page to add 1557 * @size: packet length from rx_desc 1558 * @skb: sk_buff to place the data into 1559 * 1560 * This function will add the data contained in rx_buffer->page to the skb. 1561 * This is done either through a direct copy if the data in the buffer is 1562 * less than the skb header size, otherwise it will just attach the page as 1563 * a frag to the skb. 1564 * 1565 * The function will then update the page offset if necessary and return 1566 * true if the buffer can be reused by the adapter. 1567 **/ 1568 static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, 1569 struct i40e_rx_buffer *rx_buffer, 1570 unsigned int size, 1571 struct sk_buff *skb) 1572 { 1573 struct page *page = rx_buffer->page; 1574 unsigned char *va = page_address(page) + rx_buffer->page_offset; 1575 #if (PAGE_SIZE < 8192) 1576 unsigned int truesize = I40E_RXBUFFER_2048; 1577 #else 1578 unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); 1579 #endif 1580 unsigned int pull_len; 1581 1582 if (unlikely(skb_is_nonlinear(skb))) 1583 goto add_tail_frag; 1584 1585 /* will the data fit in the skb we allocated? if so, just 1586 * copy it as it is pretty small anyway 1587 */ 1588 if (size <= I40E_RX_HDR_SIZE) { 1589 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); 1590 1591 /* page is reusable, we can reuse buffer as-is */ 1592 if (likely(i40e_page_is_reusable(page))) 1593 return true; 1594 1595 /* this page cannot be reused so discard it */ 1596 __free_pages(page, 0); 1597 return false; 1598 } 1599 1600 /* we need the header to contain the greater of either 1601 * ETH_HLEN or 60 bytes if the skb->len is less than 1602 * 60 for skb_pad. 1603 */ 1604 pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); 1605 1606 /* align pull length to size of long to optimize 1607 * memcpy performance 1608 */ 1609 memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); 1610 1611 /* update all of the pointers */ 1612 va += pull_len; 1613 size -= pull_len; 1614 1615 add_tail_frag: 1616 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 1617 (unsigned long)va & ~PAGE_MASK, size, truesize); 1618 1619 return i40e_can_reuse_rx_page(rx_buffer, page, truesize); 1620 } 1621 1622 /** 1623 * i40e_fetch_rx_buffer - Allocate skb and populate it 1624 * @rx_ring: rx descriptor ring to transact packets on 1625 * @rx_desc: descriptor containing info written by hardware 1626 * 1627 * This function allocates an skb on the fly, and populates it with the page 1628 * data from the current receive descriptor, taking care to set up the skb 1629 * correctly, as well as handling calling the page recycle function if 1630 * necessary. 1631 */ 1632 static inline 1633 struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, 1634 union i40e_rx_desc *rx_desc, 1635 struct sk_buff *skb) 1636 { 1637 u64 local_status_error_len = 1638 le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1639 unsigned int size = 1640 (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1641 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1642 struct i40e_rx_buffer *rx_buffer; 1643 struct page *page; 1644 1645 rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; 1646 page = rx_buffer->page; 1647 prefetchw(page); 1648 1649 if (likely(!skb)) { 1650 void *page_addr = page_address(page) + rx_buffer->page_offset; 1651 1652 /* prefetch first cache line of first page */ 1653 prefetch(page_addr); 1654 #if L1_CACHE_BYTES < 128 1655 prefetch(page_addr + L1_CACHE_BYTES); 1656 #endif 1657 1658 /* allocate a skb to store the frags */ 1659 skb = __napi_alloc_skb(&rx_ring->q_vector->napi, 1660 I40E_RX_HDR_SIZE, 1661 GFP_ATOMIC | __GFP_NOWARN); 1662 if (unlikely(!skb)) { 1663 rx_ring->rx_stats.alloc_buff_failed++; 1664 return NULL; 1665 } 1666 1667 /* we will be copying header into skb->data in 1668 * pskb_may_pull so it is in our interest to prefetch 1669 * it now to avoid a possible cache miss 1670 */ 1671 prefetchw(skb->data); 1672 } 1673 1674 /* we are reusing so sync this buffer for CPU use */ 1675 dma_sync_single_range_for_cpu(rx_ring->dev, 1676 rx_buffer->dma, 1677 rx_buffer->page_offset, 1678 size, 1679 DMA_FROM_DEVICE); 1680 1681 /* pull page into skb */ 1682 if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { 1683 /* hand second half of page back to the ring */ 1684 i40e_reuse_rx_page(rx_ring, rx_buffer); 1685 rx_ring->rx_stats.page_reuse_count++; 1686 } else { 1687 /* we are not reusing the buffer so unmap it */ 1688 dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, 1689 DMA_FROM_DEVICE); 1690 } 1691 1692 /* clear contents of buffer_info */ 1693 rx_buffer->page = NULL; 1694 1695 return skb; 1696 } 1697 1698 /** 1699 * i40e_is_non_eop - process handling of non-EOP buffers 1700 * @rx_ring: Rx ring being processed 1701 * @rx_desc: Rx descriptor for current buffer 1702 * @skb: Current socket buffer containing buffer in progress 1703 * 1704 * This function updates next to clean. If the buffer is an EOP buffer 1705 * this function exits returning false, otherwise it will place the 1706 * sk_buff in the next buffer to be chained and return true indicating 1707 * that this is in fact a non-EOP buffer. 1708 **/ 1709 static bool i40e_is_non_eop(struct i40e_ring *rx_ring, 1710 union i40e_rx_desc *rx_desc, 1711 struct sk_buff *skb) 1712 { 1713 u32 ntc = rx_ring->next_to_clean + 1; 1714 1715 /* fetch, update, and store next to clean */ 1716 ntc = (ntc < rx_ring->count) ? ntc : 0; 1717 rx_ring->next_to_clean = ntc; 1718 1719 prefetch(I40E_RX_DESC(rx_ring, ntc)); 1720 1721 #define staterrlen rx_desc->wb.qword1.status_error_len 1722 if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { 1723 i40e_clean_programming_status(rx_ring, rx_desc); 1724 return true; 1725 } 1726 /* if we are the last buffer then there is nothing else to do */ 1727 #define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT) 1728 if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) 1729 return false; 1730 1731 rx_ring->rx_stats.non_eop_descs++; 1732 1733 return true; 1734 } 1735 1736 /** 1737 * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf 1738 * @rx_ring: rx descriptor ring to transact packets on 1739 * @budget: Total limit on number of packets to process 1740 * 1741 * This function provides a "bounce buffer" approach to Rx interrupt 1742 * processing. The advantage to this is that on systems that have 1743 * expensive overhead for IOMMU access this provides a means of avoiding 1744 * it by maintaining the mapping of the page to the system. 1745 * 1746 * Returns amount of work completed 1747 **/ 1748 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) 1749 { 1750 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 1751 struct sk_buff *skb = rx_ring->skb; 1752 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 1753 bool failure = false; 1754 1755 while (likely(total_rx_packets < budget)) { 1756 union i40e_rx_desc *rx_desc; 1757 u16 vlan_tag; 1758 u8 rx_ptype; 1759 u64 qword; 1760 1761 /* return some buffers to hardware, one at a time is too slow */ 1762 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1763 failure = failure || 1764 i40e_alloc_rx_buffers(rx_ring, cleaned_count); 1765 cleaned_count = 0; 1766 } 1767 1768 rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); 1769 1770 /* status_error_len will always be zero for unused descriptors 1771 * because it's cleared in cleanup, and overlaps with hdr_addr 1772 * which is always zero because packet split isn't used, if the 1773 * hardware wrote DD then it will be non-zero 1774 */ 1775 if (!i40e_test_staterr(rx_desc, 1776 BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) 1777 break; 1778 1779 /* This memory barrier is needed to keep us from reading 1780 * any other fields out of the rx_desc until we know the 1781 * DD bit is set. 1782 */ 1783 dma_rmb(); 1784 1785 skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb); 1786 if (!skb) 1787 break; 1788 1789 cleaned_count++; 1790 1791 if (i40e_is_non_eop(rx_ring, rx_desc, skb)) 1792 continue; 1793 1794 /* ERR_MASK will only have valid bits if EOP set, and 1795 * what we are doing here is actually checking 1796 * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in 1797 * the error field 1798 */ 1799 if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { 1800 dev_kfree_skb_any(skb); 1801 continue; 1802 } 1803 1804 if (i40e_cleanup_headers(rx_ring, skb)) { 1805 skb = NULL; 1806 continue; 1807 } 1808 1809 /* probably a little skewed due to removing CRC */ 1810 total_rx_bytes += skb->len; 1811 1812 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1813 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1814 I40E_RXD_QW1_PTYPE_SHIFT; 1815 1816 /* populate checksum, VLAN, and protocol */ 1817 i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); 1818 1819 #ifdef I40E_FCOE 1820 if (unlikely( 1821 i40e_rx_is_fcoe(rx_ptype) && 1822 !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) { 1823 dev_kfree_skb_any(skb); 1824 continue; 1825 } 1826 #endif 1827 1828 vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? 1829 le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; 1830 1831 i40e_receive_skb(rx_ring, skb, vlan_tag); 1832 skb = NULL; 1833 1834 /* update budget accounting */ 1835 total_rx_packets++; 1836 } 1837 1838 rx_ring->skb = skb; 1839 1840 u64_stats_update_begin(&rx_ring->syncp); 1841 rx_ring->stats.packets += total_rx_packets; 1842 rx_ring->stats.bytes += total_rx_bytes; 1843 u64_stats_update_end(&rx_ring->syncp); 1844 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1845 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1846 1847 /* guarantee a trip back through this routine if there was a failure */ 1848 return failure ? budget : total_rx_packets; 1849 } 1850 1851 static u32 i40e_buildreg_itr(const int type, const u16 itr) 1852 { 1853 u32 val; 1854 1855 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | 1856 /* Don't clear PBA because that can cause lost interrupts that 1857 * came in while we were cleaning/polling 1858 */ 1859 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | 1860 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); 1861 1862 return val; 1863 } 1864 1865 /* a small macro to shorten up some long lines */ 1866 #define INTREG I40E_PFINT_DYN_CTLN 1867 static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) 1868 { 1869 return vsi->rx_rings[idx]->rx_itr_setting; 1870 } 1871 1872 static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) 1873 { 1874 return vsi->tx_rings[idx]->tx_itr_setting; 1875 } 1876 1877 /** 1878 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt 1879 * @vsi: the VSI we care about 1880 * @q_vector: q_vector for which itr is being updated and interrupt enabled 1881 * 1882 **/ 1883 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, 1884 struct i40e_q_vector *q_vector) 1885 { 1886 struct i40e_hw *hw = &vsi->back->hw; 1887 bool rx = false, tx = false; 1888 u32 rxval, txval; 1889 int vector; 1890 int idx = q_vector->v_idx; 1891 int rx_itr_setting, tx_itr_setting; 1892 1893 vector = (q_vector->v_idx + vsi->base_vector); 1894 1895 /* avoid dynamic calculation if in countdown mode OR if 1896 * all dynamic is disabled 1897 */ 1898 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); 1899 1900 rx_itr_setting = get_rx_itr(vsi, idx); 1901 tx_itr_setting = get_tx_itr(vsi, idx); 1902 1903 if (q_vector->itr_countdown > 0 || 1904 (!ITR_IS_DYNAMIC(rx_itr_setting) && 1905 !ITR_IS_DYNAMIC(tx_itr_setting))) { 1906 goto enable_int; 1907 } 1908 1909 if (ITR_IS_DYNAMIC(tx_itr_setting)) { 1910 rx = i40e_set_new_dynamic_itr(&q_vector->rx); 1911 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); 1912 } 1913 1914 if (ITR_IS_DYNAMIC(tx_itr_setting)) { 1915 tx = i40e_set_new_dynamic_itr(&q_vector->tx); 1916 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); 1917 } 1918 1919 if (rx || tx) { 1920 /* get the higher of the two ITR adjustments and 1921 * use the same value for both ITR registers 1922 * when in adaptive mode (Rx and/or Tx) 1923 */ 1924 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); 1925 1926 q_vector->tx.itr = q_vector->rx.itr = itr; 1927 txval = i40e_buildreg_itr(I40E_TX_ITR, itr); 1928 tx = true; 1929 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); 1930 rx = true; 1931 } 1932 1933 /* only need to enable the interrupt once, but need 1934 * to possibly update both ITR values 1935 */ 1936 if (rx) { 1937 /* set the INTENA_MSK_MASK so that this first write 1938 * won't actually enable the interrupt, instead just 1939 * updating the ITR (it's bit 31 PF and VF) 1940 */ 1941 rxval |= BIT(31); 1942 /* don't check _DOWN because interrupt isn't being enabled */ 1943 wr32(hw, INTREG(vector - 1), rxval); 1944 } 1945 1946 enable_int: 1947 if (!test_bit(__I40E_DOWN, &vsi->state)) 1948 wr32(hw, INTREG(vector - 1), txval); 1949 1950 if (q_vector->itr_countdown) 1951 q_vector->itr_countdown--; 1952 else 1953 q_vector->itr_countdown = ITR_COUNTDOWN_START; 1954 } 1955 1956 /** 1957 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine 1958 * @napi: napi struct with our devices info in it 1959 * @budget: amount of work driver is allowed to do this pass, in packets 1960 * 1961 * This function will clean all queues associated with a q_vector. 1962 * 1963 * Returns the amount of work done 1964 **/ 1965 int i40e_napi_poll(struct napi_struct *napi, int budget) 1966 { 1967 struct i40e_q_vector *q_vector = 1968 container_of(napi, struct i40e_q_vector, napi); 1969 struct i40e_vsi *vsi = q_vector->vsi; 1970 struct i40e_ring *ring; 1971 bool clean_complete = true; 1972 bool arm_wb = false; 1973 int budget_per_ring; 1974 int work_done = 0; 1975 1976 if (test_bit(__I40E_DOWN, &vsi->state)) { 1977 napi_complete(napi); 1978 return 0; 1979 } 1980 1981 /* Clear hung_detected bit */ 1982 clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); 1983 /* Since the actual Tx work is minimal, we can give the Tx a larger 1984 * budget and be more aggressive about cleaning up the Tx descriptors. 1985 */ 1986 i40e_for_each_ring(ring, q_vector->tx) { 1987 if (!i40e_clean_tx_irq(vsi, ring, budget)) { 1988 clean_complete = false; 1989 continue; 1990 } 1991 arm_wb |= ring->arm_wb; 1992 ring->arm_wb = false; 1993 } 1994 1995 /* Handle case where we are called by netpoll with a budget of 0 */ 1996 if (budget <= 0) 1997 goto tx_only; 1998 1999 /* We attempt to distribute budget to each Rx queue fairly, but don't 2000 * allow the budget to go below 1 because that would exit polling early. 2001 */ 2002 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 2003 2004 i40e_for_each_ring(ring, q_vector->rx) { 2005 int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); 2006 2007 work_done += cleaned; 2008 /* if we clean as many as budgeted, we must not be done */ 2009 if (cleaned >= budget_per_ring) 2010 clean_complete = false; 2011 } 2012 2013 /* If work not completed, return budget and polling will return */ 2014 if (!clean_complete) { 2015 const cpumask_t *aff_mask = &q_vector->affinity_mask; 2016 int cpu_id = smp_processor_id(); 2017 2018 /* It is possible that the interrupt affinity has changed but, 2019 * if the cpu is pegged at 100%, polling will never exit while 2020 * traffic continues and the interrupt will be stuck on this 2021 * cpu. We check to make sure affinity is correct before we 2022 * continue to poll, otherwise we must stop polling so the 2023 * interrupt can move to the correct cpu. 2024 */ 2025 if (likely(cpumask_test_cpu(cpu_id, aff_mask) || 2026 !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) { 2027 tx_only: 2028 if (arm_wb) { 2029 q_vector->tx.ring[0].tx_stats.tx_force_wb++; 2030 i40e_enable_wb_on_itr(vsi, q_vector); 2031 } 2032 return budget; 2033 } 2034 } 2035 2036 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) 2037 q_vector->arm_wb_state = false; 2038 2039 /* Work is done so exit the polling mode and re-enable the interrupt */ 2040 napi_complete_done(napi, work_done); 2041 2042 /* If we're prematurely stopping polling to fix the interrupt 2043 * affinity we want to make sure polling starts back up so we 2044 * issue a call to i40e_force_wb which triggers a SW interrupt. 2045 */ 2046 if (!clean_complete) 2047 i40e_force_wb(vsi, q_vector); 2048 else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) 2049 i40e_irq_dynamic_enable_icr0(vsi->back, false); 2050 else 2051 i40e_update_enable_itr(vsi, q_vector); 2052 2053 return min(work_done, budget - 1); 2054 } 2055 2056 /** 2057 * i40e_atr - Add a Flow Director ATR filter 2058 * @tx_ring: ring to add programming descriptor to 2059 * @skb: send buffer 2060 * @tx_flags: send tx flags 2061 **/ 2062 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, 2063 u32 tx_flags) 2064 { 2065 struct i40e_filter_program_desc *fdir_desc; 2066 struct i40e_pf *pf = tx_ring->vsi->back; 2067 union { 2068 unsigned char *network; 2069 struct iphdr *ipv4; 2070 struct ipv6hdr *ipv6; 2071 } hdr; 2072 struct tcphdr *th; 2073 unsigned int hlen; 2074 u32 flex_ptype, dtype_cmd; 2075 int l4_proto; 2076 u16 i; 2077 2078 /* make sure ATR is enabled */ 2079 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) 2080 return; 2081 2082 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) 2083 return; 2084 2085 /* if sampling is disabled do nothing */ 2086 if (!tx_ring->atr_sample_rate) 2087 return; 2088 2089 /* Currently only IPv4/IPv6 with TCP is supported */ 2090 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) 2091 return; 2092 2093 /* snag network header to get L4 type and address */ 2094 hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ? 2095 skb_inner_network_header(skb) : skb_network_header(skb); 2096 2097 /* Note: tx_flags gets modified to reflect inner protocols in 2098 * tx_enable_csum function if encap is enabled. 2099 */ 2100 if (tx_flags & I40E_TX_FLAGS_IPV4) { 2101 /* access ihl as u8 to avoid unaligned access on ia64 */ 2102 hlen = (hdr.network[0] & 0x0F) << 2; 2103 l4_proto = hdr.ipv4->protocol; 2104 } else { 2105 hlen = hdr.network - skb->data; 2106 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); 2107 hlen -= hdr.network - skb->data; 2108 } 2109 2110 if (l4_proto != IPPROTO_TCP) 2111 return; 2112 2113 th = (struct tcphdr *)(hdr.network + hlen); 2114 2115 /* Due to lack of space, no more new filters can be programmed */ 2116 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) 2117 return; 2118 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && 2119 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { 2120 /* HW ATR eviction will take care of removing filters on FIN 2121 * and RST packets. 2122 */ 2123 if (th->fin || th->rst) 2124 return; 2125 } 2126 2127 tx_ring->atr_count++; 2128 2129 /* sample on all syn/fin/rst packets or once every atr sample rate */ 2130 if (!th->fin && 2131 !th->syn && 2132 !th->rst && 2133 (tx_ring->atr_count < tx_ring->atr_sample_rate)) 2134 return; 2135 2136 tx_ring->atr_count = 0; 2137 2138 /* grab the next descriptor */ 2139 i = tx_ring->next_to_use; 2140 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 2141 2142 i++; 2143 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 2144 2145 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 2146 I40E_TXD_FLTR_QW0_QINDEX_MASK; 2147 flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? 2148 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << 2149 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : 2150 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << 2151 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); 2152 2153 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; 2154 2155 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; 2156 2157 dtype_cmd |= (th->fin || th->rst) ? 2158 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 2159 I40E_TXD_FLTR_QW1_PCMD_SHIFT) : 2160 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 2161 I40E_TXD_FLTR_QW1_PCMD_SHIFT); 2162 2163 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX << 2164 I40E_TXD_FLTR_QW1_DEST_SHIFT; 2165 2166 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << 2167 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; 2168 2169 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 2170 if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) 2171 dtype_cmd |= 2172 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) << 2173 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 2174 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 2175 else 2176 dtype_cmd |= 2177 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) << 2178 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 2179 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 2180 2181 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && 2182 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) 2183 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; 2184 2185 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); 2186 fdir_desc->rsvd = cpu_to_le32(0); 2187 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 2188 fdir_desc->fd_id = cpu_to_le32(0); 2189 } 2190 2191 /** 2192 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 2193 * @skb: send buffer 2194 * @tx_ring: ring to send buffer on 2195 * @flags: the tx flags to be set 2196 * 2197 * Checks the skb and set up correspondingly several generic transmit flags 2198 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 2199 * 2200 * Returns error code indicate the frame should be dropped upon error and the 2201 * otherwise returns 0 to indicate the flags has been set properly. 2202 **/ 2203 #ifdef I40E_FCOE 2204 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 2205 struct i40e_ring *tx_ring, 2206 u32 *flags) 2207 #else 2208 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 2209 struct i40e_ring *tx_ring, 2210 u32 *flags) 2211 #endif 2212 { 2213 __be16 protocol = skb->protocol; 2214 u32 tx_flags = 0; 2215 2216 if (protocol == htons(ETH_P_8021Q) && 2217 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { 2218 /* When HW VLAN acceleration is turned off by the user the 2219 * stack sets the protocol to 8021q so that the driver 2220 * can take any steps required to support the SW only 2221 * VLAN handling. In our case the driver doesn't need 2222 * to take any further steps so just set the protocol 2223 * to the encapsulated ethertype. 2224 */ 2225 skb->protocol = vlan_get_protocol(skb); 2226 goto out; 2227 } 2228 2229 /* if we have a HW VLAN tag being added, default to the HW one */ 2230 if (skb_vlan_tag_present(skb)) { 2231 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 2232 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 2233 /* else if it is a SW VLAN, check the next protocol and store the tag */ 2234 } else if (protocol == htons(ETH_P_8021Q)) { 2235 struct vlan_hdr *vhdr, _vhdr; 2236 2237 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 2238 if (!vhdr) 2239 return -EINVAL; 2240 2241 protocol = vhdr->h_vlan_encapsulated_proto; 2242 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 2243 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 2244 } 2245 2246 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED)) 2247 goto out; 2248 2249 /* Insert 802.1p priority into VLAN header */ 2250 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) || 2251 (skb->priority != TC_PRIO_CONTROL)) { 2252 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK; 2253 tx_flags |= (skb->priority & 0x7) << 2254 I40E_TX_FLAGS_VLAN_PRIO_SHIFT; 2255 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) { 2256 struct vlan_ethhdr *vhdr; 2257 int rc; 2258 2259 rc = skb_cow_head(skb, 0); 2260 if (rc < 0) 2261 return rc; 2262 vhdr = (struct vlan_ethhdr *)skb->data; 2263 vhdr->h_vlan_TCI = htons(tx_flags >> 2264 I40E_TX_FLAGS_VLAN_SHIFT); 2265 } else { 2266 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 2267 } 2268 } 2269 2270 out: 2271 *flags = tx_flags; 2272 return 0; 2273 } 2274 2275 /** 2276 * i40e_tso - set up the tso context descriptor 2277 * @first: pointer to first Tx buffer for xmit 2278 * @hdr_len: ptr to the size of the packet header 2279 * @cd_type_cmd_tso_mss: Quad Word 1 2280 * 2281 * Returns 0 if no TSO can happen, 1 if tso is going, or error 2282 **/ 2283 static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, 2284 u64 *cd_type_cmd_tso_mss) 2285 { 2286 struct sk_buff *skb = first->skb; 2287 u64 cd_cmd, cd_tso_len, cd_mss; 2288 union { 2289 struct iphdr *v4; 2290 struct ipv6hdr *v6; 2291 unsigned char *hdr; 2292 } ip; 2293 union { 2294 struct tcphdr *tcp; 2295 struct udphdr *udp; 2296 unsigned char *hdr; 2297 } l4; 2298 u32 paylen, l4_offset; 2299 u16 gso_segs, gso_size; 2300 int err; 2301 2302 if (skb->ip_summed != CHECKSUM_PARTIAL) 2303 return 0; 2304 2305 if (!skb_is_gso(skb)) 2306 return 0; 2307 2308 err = skb_cow_head(skb, 0); 2309 if (err < 0) 2310 return err; 2311 2312 ip.hdr = skb_network_header(skb); 2313 l4.hdr = skb_transport_header(skb); 2314 2315 /* initialize outer IP header fields */ 2316 if (ip.v4->version == 4) { 2317 ip.v4->tot_len = 0; 2318 ip.v4->check = 0; 2319 } else { 2320 ip.v6->payload_len = 0; 2321 } 2322 2323 if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | 2324 SKB_GSO_GRE_CSUM | 2325 SKB_GSO_IPXIP4 | 2326 SKB_GSO_IPXIP6 | 2327 SKB_GSO_UDP_TUNNEL | 2328 SKB_GSO_UDP_TUNNEL_CSUM)) { 2329 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 2330 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { 2331 l4.udp->len = 0; 2332 2333 /* determine offset of outer transport header */ 2334 l4_offset = l4.hdr - skb->data; 2335 2336 /* remove payload length from outer checksum */ 2337 paylen = skb->len - l4_offset; 2338 csum_replace_by_diff(&l4.udp->check, 2339 (__force __wsum)htonl(paylen)); 2340 } 2341 2342 /* reset pointers to inner headers */ 2343 ip.hdr = skb_inner_network_header(skb); 2344 l4.hdr = skb_inner_transport_header(skb); 2345 2346 /* initialize inner IP header fields */ 2347 if (ip.v4->version == 4) { 2348 ip.v4->tot_len = 0; 2349 ip.v4->check = 0; 2350 } else { 2351 ip.v6->payload_len = 0; 2352 } 2353 } 2354 2355 /* determine offset of inner transport header */ 2356 l4_offset = l4.hdr - skb->data; 2357 2358 /* remove payload length from inner checksum */ 2359 paylen = skb->len - l4_offset; 2360 csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); 2361 2362 /* compute length of segmentation header */ 2363 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 2364 2365 /* pull values out of skb_shinfo */ 2366 gso_size = skb_shinfo(skb)->gso_size; 2367 gso_segs = skb_shinfo(skb)->gso_segs; 2368 2369 /* update GSO size and bytecount with header size */ 2370 first->gso_segs = gso_segs; 2371 first->bytecount += (first->gso_segs - 1) * *hdr_len; 2372 2373 /* find the field values */ 2374 cd_cmd = I40E_TX_CTX_DESC_TSO; 2375 cd_tso_len = skb->len - *hdr_len; 2376 cd_mss = gso_size; 2377 *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 2378 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 2379 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 2380 return 1; 2381 } 2382 2383 /** 2384 * i40e_tsyn - set up the tsyn context descriptor 2385 * @tx_ring: ptr to the ring to send 2386 * @skb: ptr to the skb we're sending 2387 * @tx_flags: the collected send information 2388 * @cd_type_cmd_tso_mss: Quad Word 1 2389 * 2390 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen 2391 **/ 2392 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, 2393 u32 tx_flags, u64 *cd_type_cmd_tso_mss) 2394 { 2395 struct i40e_pf *pf; 2396 2397 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) 2398 return 0; 2399 2400 /* Tx timestamps cannot be sampled when doing TSO */ 2401 if (tx_flags & I40E_TX_FLAGS_TSO) 2402 return 0; 2403 2404 /* only timestamp the outbound packet if the user has requested it and 2405 * we are not already transmitting a packet to be timestamped 2406 */ 2407 pf = i40e_netdev_to_pf(tx_ring->netdev); 2408 if (!(pf->flags & I40E_FLAG_PTP)) 2409 return 0; 2410 2411 if (pf->ptp_tx && 2412 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) { 2413 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 2414 pf->ptp_tx_skb = skb_get(skb); 2415 } else { 2416 return 0; 2417 } 2418 2419 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN << 2420 I40E_TXD_CTX_QW1_CMD_SHIFT; 2421 2422 return 1; 2423 } 2424 2425 /** 2426 * i40e_tx_enable_csum - Enable Tx checksum offloads 2427 * @skb: send buffer 2428 * @tx_flags: pointer to Tx flags currently set 2429 * @td_cmd: Tx descriptor command bits to set 2430 * @td_offset: Tx descriptor header offsets to set 2431 * @tx_ring: Tx descriptor ring 2432 * @cd_tunneling: ptr to context desc bits 2433 **/ 2434 static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, 2435 u32 *td_cmd, u32 *td_offset, 2436 struct i40e_ring *tx_ring, 2437 u32 *cd_tunneling) 2438 { 2439 union { 2440 struct iphdr *v4; 2441 struct ipv6hdr *v6; 2442 unsigned char *hdr; 2443 } ip; 2444 union { 2445 struct tcphdr *tcp; 2446 struct udphdr *udp; 2447 unsigned char *hdr; 2448 } l4; 2449 unsigned char *exthdr; 2450 u32 offset, cmd = 0; 2451 __be16 frag_off; 2452 u8 l4_proto = 0; 2453 2454 if (skb->ip_summed != CHECKSUM_PARTIAL) 2455 return 0; 2456 2457 ip.hdr = skb_network_header(skb); 2458 l4.hdr = skb_transport_header(skb); 2459 2460 /* compute outer L2 header size */ 2461 offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 2462 2463 if (skb->encapsulation) { 2464 u32 tunnel = 0; 2465 /* define outer network header type */ 2466 if (*tx_flags & I40E_TX_FLAGS_IPV4) { 2467 tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? 2468 I40E_TX_CTX_EXT_IP_IPV4 : 2469 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 2470 2471 l4_proto = ip.v4->protocol; 2472 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { 2473 tunnel |= I40E_TX_CTX_EXT_IP_IPV6; 2474 2475 exthdr = ip.hdr + sizeof(*ip.v6); 2476 l4_proto = ip.v6->nexthdr; 2477 if (l4.hdr != exthdr) 2478 ipv6_skip_exthdr(skb, exthdr - skb->data, 2479 &l4_proto, &frag_off); 2480 } 2481 2482 /* define outer transport */ 2483 switch (l4_proto) { 2484 case IPPROTO_UDP: 2485 tunnel |= I40E_TXD_CTX_UDP_TUNNELING; 2486 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; 2487 break; 2488 case IPPROTO_GRE: 2489 tunnel |= I40E_TXD_CTX_GRE_TUNNELING; 2490 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; 2491 break; 2492 case IPPROTO_IPIP: 2493 case IPPROTO_IPV6: 2494 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; 2495 l4.hdr = skb_inner_network_header(skb); 2496 break; 2497 default: 2498 if (*tx_flags & I40E_TX_FLAGS_TSO) 2499 return -1; 2500 2501 skb_checksum_help(skb); 2502 return 0; 2503 } 2504 2505 /* compute outer L3 header size */ 2506 tunnel |= ((l4.hdr - ip.hdr) / 4) << 2507 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; 2508 2509 /* switch IP header pointer from outer to inner header */ 2510 ip.hdr = skb_inner_network_header(skb); 2511 2512 /* compute tunnel header size */ 2513 tunnel |= ((ip.hdr - l4.hdr) / 2) << 2514 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 2515 2516 /* indicate if we need to offload outer UDP header */ 2517 if ((*tx_flags & I40E_TX_FLAGS_TSO) && 2518 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 2519 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 2520 tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; 2521 2522 /* record tunnel offload values */ 2523 *cd_tunneling |= tunnel; 2524 2525 /* switch L4 header pointer from outer to inner */ 2526 l4.hdr = skb_inner_transport_header(skb); 2527 l4_proto = 0; 2528 2529 /* reset type as we transition from outer to inner headers */ 2530 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); 2531 if (ip.v4->version == 4) 2532 *tx_flags |= I40E_TX_FLAGS_IPV4; 2533 if (ip.v6->version == 6) 2534 *tx_flags |= I40E_TX_FLAGS_IPV6; 2535 } 2536 2537 /* Enable IP checksum offloads */ 2538 if (*tx_flags & I40E_TX_FLAGS_IPV4) { 2539 l4_proto = ip.v4->protocol; 2540 /* the stack computes the IP header already, the only time we 2541 * need the hardware to recompute it is in the case of TSO. 2542 */ 2543 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? 2544 I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : 2545 I40E_TX_DESC_CMD_IIPT_IPV4; 2546 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { 2547 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 2548 2549 exthdr = ip.hdr + sizeof(*ip.v6); 2550 l4_proto = ip.v6->nexthdr; 2551 if (l4.hdr != exthdr) 2552 ipv6_skip_exthdr(skb, exthdr - skb->data, 2553 &l4_proto, &frag_off); 2554 } 2555 2556 /* compute inner L3 header size */ 2557 offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 2558 2559 /* Enable L4 checksum offloads */ 2560 switch (l4_proto) { 2561 case IPPROTO_TCP: 2562 /* enable checksum offloads */ 2563 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 2564 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2565 break; 2566 case IPPROTO_SCTP: 2567 /* enable SCTP checksum offload */ 2568 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 2569 offset |= (sizeof(struct sctphdr) >> 2) << 2570 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2571 break; 2572 case IPPROTO_UDP: 2573 /* enable UDP checksum offload */ 2574 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 2575 offset |= (sizeof(struct udphdr) >> 2) << 2576 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2577 break; 2578 default: 2579 if (*tx_flags & I40E_TX_FLAGS_TSO) 2580 return -1; 2581 skb_checksum_help(skb); 2582 return 0; 2583 } 2584 2585 *td_cmd |= cmd; 2586 *td_offset |= offset; 2587 2588 return 1; 2589 } 2590 2591 /** 2592 * i40e_create_tx_ctx Build the Tx context descriptor 2593 * @tx_ring: ring to create the descriptor on 2594 * @cd_type_cmd_tso_mss: Quad Word 1 2595 * @cd_tunneling: Quad Word 0 - bits 0-31 2596 * @cd_l2tag2: Quad Word 0 - bits 32-63 2597 **/ 2598 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 2599 const u64 cd_type_cmd_tso_mss, 2600 const u32 cd_tunneling, const u32 cd_l2tag2) 2601 { 2602 struct i40e_tx_context_desc *context_desc; 2603 int i = tx_ring->next_to_use; 2604 2605 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && 2606 !cd_tunneling && !cd_l2tag2) 2607 return; 2608 2609 /* grab the next descriptor */ 2610 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 2611 2612 i++; 2613 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 2614 2615 /* cpu_to_le32 and assign to struct fields */ 2616 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 2617 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 2618 context_desc->rsvd = cpu_to_le16(0); 2619 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 2620 } 2621 2622 /** 2623 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions 2624 * @tx_ring: the ring to be checked 2625 * @size: the size buffer we want to assure is available 2626 * 2627 * Returns -EBUSY if a stop is needed, else 0 2628 **/ 2629 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 2630 { 2631 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 2632 /* Memory barrier before checking head and tail */ 2633 smp_mb(); 2634 2635 /* Check again in a case another CPU has just made room available. */ 2636 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 2637 return -EBUSY; 2638 2639 /* A reprieve! - use start_queue because it doesn't call schedule */ 2640 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 2641 ++tx_ring->tx_stats.restart_queue; 2642 return 0; 2643 } 2644 2645 /** 2646 * __i40e_chk_linearize - Check if there are more than 8 buffers per packet 2647 * @skb: send buffer 2648 * 2649 * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire 2650 * and so we need to figure out the cases where we need to linearize the skb. 2651 * 2652 * For TSO we need to count the TSO header and segment payload separately. 2653 * As such we need to check cases where we have 7 fragments or more as we 2654 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for 2655 * the segment payload in the first descriptor, and another 7 for the 2656 * fragments. 2657 **/ 2658 bool __i40e_chk_linearize(struct sk_buff *skb) 2659 { 2660 const struct skb_frag_struct *frag, *stale; 2661 int nr_frags, sum; 2662 2663 /* no need to check if number of frags is less than 7 */ 2664 nr_frags = skb_shinfo(skb)->nr_frags; 2665 if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) 2666 return false; 2667 2668 /* We need to walk through the list and validate that each group 2669 * of 6 fragments totals at least gso_size. 2670 */ 2671 nr_frags -= I40E_MAX_BUFFER_TXD - 2; 2672 frag = &skb_shinfo(skb)->frags[0]; 2673 2674 /* Initialize size to the negative value of gso_size minus 1. We 2675 * use this as the worst case scenerio in which the frag ahead 2676 * of us only provides one byte which is why we are limited to 6 2677 * descriptors for a single transmit as the header and previous 2678 * fragment are already consuming 2 descriptors. 2679 */ 2680 sum = 1 - skb_shinfo(skb)->gso_size; 2681 2682 /* Add size of frags 0 through 4 to create our initial sum */ 2683 sum += skb_frag_size(frag++); 2684 sum += skb_frag_size(frag++); 2685 sum += skb_frag_size(frag++); 2686 sum += skb_frag_size(frag++); 2687 sum += skb_frag_size(frag++); 2688 2689 /* Walk through fragments adding latest fragment, testing it, and 2690 * then removing stale fragments from the sum. 2691 */ 2692 stale = &skb_shinfo(skb)->frags[0]; 2693 for (;;) { 2694 sum += skb_frag_size(frag++); 2695 2696 /* if sum is negative we failed to make sufficient progress */ 2697 if (sum < 0) 2698 return true; 2699 2700 if (!nr_frags--) 2701 break; 2702 2703 sum -= skb_frag_size(stale++); 2704 } 2705 2706 return false; 2707 } 2708 2709 /** 2710 * i40e_tx_map - Build the Tx descriptor 2711 * @tx_ring: ring to send buffer on 2712 * @skb: send buffer 2713 * @first: first buffer info buffer to use 2714 * @tx_flags: collected send information 2715 * @hdr_len: size of the packet header 2716 * @td_cmd: the command field in the descriptor 2717 * @td_offset: offset for checksum or crc 2718 **/ 2719 #ifdef I40E_FCOE 2720 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 2721 struct i40e_tx_buffer *first, u32 tx_flags, 2722 const u8 hdr_len, u32 td_cmd, u32 td_offset) 2723 #else 2724 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 2725 struct i40e_tx_buffer *first, u32 tx_flags, 2726 const u8 hdr_len, u32 td_cmd, u32 td_offset) 2727 #endif 2728 { 2729 unsigned int data_len = skb->data_len; 2730 unsigned int size = skb_headlen(skb); 2731 struct skb_frag_struct *frag; 2732 struct i40e_tx_buffer *tx_bi; 2733 struct i40e_tx_desc *tx_desc; 2734 u16 i = tx_ring->next_to_use; 2735 u32 td_tag = 0; 2736 dma_addr_t dma; 2737 u16 desc_count = 1; 2738 2739 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 2740 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 2741 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 2742 I40E_TX_FLAGS_VLAN_SHIFT; 2743 } 2744 2745 first->tx_flags = tx_flags; 2746 2747 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 2748 2749 tx_desc = I40E_TX_DESC(tx_ring, i); 2750 tx_bi = first; 2751 2752 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 2753 unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; 2754 2755 if (dma_mapping_error(tx_ring->dev, dma)) 2756 goto dma_error; 2757 2758 /* record length, and DMA address */ 2759 dma_unmap_len_set(tx_bi, len, size); 2760 dma_unmap_addr_set(tx_bi, dma, dma); 2761 2762 /* align size to end of page */ 2763 max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1); 2764 tx_desc->buffer_addr = cpu_to_le64(dma); 2765 2766 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 2767 tx_desc->cmd_type_offset_bsz = 2768 build_ctob(td_cmd, td_offset, 2769 max_data, td_tag); 2770 2771 tx_desc++; 2772 i++; 2773 desc_count++; 2774 2775 if (i == tx_ring->count) { 2776 tx_desc = I40E_TX_DESC(tx_ring, 0); 2777 i = 0; 2778 } 2779 2780 dma += max_data; 2781 size -= max_data; 2782 2783 max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; 2784 tx_desc->buffer_addr = cpu_to_le64(dma); 2785 } 2786 2787 if (likely(!data_len)) 2788 break; 2789 2790 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 2791 size, td_tag); 2792 2793 tx_desc++; 2794 i++; 2795 desc_count++; 2796 2797 if (i == tx_ring->count) { 2798 tx_desc = I40E_TX_DESC(tx_ring, 0); 2799 i = 0; 2800 } 2801 2802 size = skb_frag_size(frag); 2803 data_len -= size; 2804 2805 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 2806 DMA_TO_DEVICE); 2807 2808 tx_bi = &tx_ring->tx_bi[i]; 2809 } 2810 2811 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 2812 2813 i++; 2814 if (i == tx_ring->count) 2815 i = 0; 2816 2817 tx_ring->next_to_use = i; 2818 2819 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); 2820 2821 /* write last descriptor with EOP bit */ 2822 td_cmd |= I40E_TX_DESC_CMD_EOP; 2823 2824 /* We can OR these values together as they both are checked against 2825 * 4 below and at this point desc_count will be used as a boolean value 2826 * after this if/else block. 2827 */ 2828 desc_count |= ++tx_ring->packet_stride; 2829 2830 /* Algorithm to optimize tail and RS bit setting: 2831 * if queue is stopped 2832 * mark RS bit 2833 * reset packet counter 2834 * else if xmit_more is supported and is true 2835 * advance packet counter to 4 2836 * reset desc_count to 0 2837 * 2838 * if desc_count >= 4 2839 * mark RS bit 2840 * reset packet counter 2841 * if desc_count > 0 2842 * update tail 2843 * 2844 * Note: If there are less than 4 descriptors 2845 * pending and interrupts were disabled the service task will 2846 * trigger a force WB. 2847 */ 2848 if (netif_xmit_stopped(txring_txq(tx_ring))) { 2849 goto do_rs; 2850 } else if (skb->xmit_more) { 2851 /* set stride to arm on next packet and reset desc_count */ 2852 tx_ring->packet_stride = WB_STRIDE; 2853 desc_count = 0; 2854 } else if (desc_count >= WB_STRIDE) { 2855 do_rs: 2856 /* write last descriptor with RS bit set */ 2857 td_cmd |= I40E_TX_DESC_CMD_RS; 2858 tx_ring->packet_stride = 0; 2859 } 2860 2861 tx_desc->cmd_type_offset_bsz = 2862 build_ctob(td_cmd, td_offset, size, td_tag); 2863 2864 /* Force memory writes to complete before letting h/w know there 2865 * are new descriptors to fetch. 2866 * 2867 * We also use this memory barrier to make certain all of the 2868 * status bits have been updated before next_to_watch is written. 2869 */ 2870 wmb(); 2871 2872 /* set next_to_watch value indicating a packet is present */ 2873 first->next_to_watch = tx_desc; 2874 2875 /* notify HW of packet */ 2876 if (desc_count) { 2877 writel(i, tx_ring->tail); 2878 2879 /* we need this if more than one processor can write to our tail 2880 * at a time, it synchronizes IO on IA64/Altix systems 2881 */ 2882 mmiowb(); 2883 } 2884 2885 return; 2886 2887 dma_error: 2888 dev_info(tx_ring->dev, "TX DMA map failed\n"); 2889 2890 /* clear dma mappings for failed tx_bi map */ 2891 for (;;) { 2892 tx_bi = &tx_ring->tx_bi[i]; 2893 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 2894 if (tx_bi == first) 2895 break; 2896 if (i == 0) 2897 i = tx_ring->count; 2898 i--; 2899 } 2900 2901 tx_ring->next_to_use = i; 2902 } 2903 2904 /** 2905 * i40e_xmit_frame_ring - Sends buffer on Tx ring 2906 * @skb: send buffer 2907 * @tx_ring: ring to send buffer on 2908 * 2909 * Returns NETDEV_TX_OK if sent, else an error code 2910 **/ 2911 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 2912 struct i40e_ring *tx_ring) 2913 { 2914 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 2915 u32 cd_tunneling = 0, cd_l2tag2 = 0; 2916 struct i40e_tx_buffer *first; 2917 u32 td_offset = 0; 2918 u32 tx_flags = 0; 2919 __be16 protocol; 2920 u32 td_cmd = 0; 2921 u8 hdr_len = 0; 2922 int tso, count; 2923 int tsyn; 2924 2925 /* prefetch the data, we'll need it later */ 2926 prefetch(skb->data); 2927 2928 count = i40e_xmit_descriptor_count(skb); 2929 if (i40e_chk_linearize(skb, count)) { 2930 if (__skb_linearize(skb)) { 2931 dev_kfree_skb_any(skb); 2932 return NETDEV_TX_OK; 2933 } 2934 count = i40e_txd_use_count(skb->len); 2935 tx_ring->tx_stats.tx_linearize++; 2936 } 2937 2938 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 2939 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 2940 * + 4 desc gap to avoid the cache line where head is, 2941 * + 1 desc for context descriptor, 2942 * otherwise try next time 2943 */ 2944 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { 2945 tx_ring->tx_stats.tx_busy++; 2946 return NETDEV_TX_BUSY; 2947 } 2948 2949 /* record the location of the first descriptor for this packet */ 2950 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 2951 first->skb = skb; 2952 first->bytecount = skb->len; 2953 first->gso_segs = 1; 2954 2955 /* prepare the xmit flags */ 2956 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 2957 goto out_drop; 2958 2959 /* obtain protocol of skb */ 2960 protocol = vlan_get_protocol(skb); 2961 2962 /* setup IPv4/IPv6 offloads */ 2963 if (protocol == htons(ETH_P_IP)) 2964 tx_flags |= I40E_TX_FLAGS_IPV4; 2965 else if (protocol == htons(ETH_P_IPV6)) 2966 tx_flags |= I40E_TX_FLAGS_IPV6; 2967 2968 tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); 2969 2970 if (tso < 0) 2971 goto out_drop; 2972 else if (tso) 2973 tx_flags |= I40E_TX_FLAGS_TSO; 2974 2975 /* Always offload the checksum, since it's in the data descriptor */ 2976 tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, 2977 tx_ring, &cd_tunneling); 2978 if (tso < 0) 2979 goto out_drop; 2980 2981 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); 2982 2983 if (tsyn) 2984 tx_flags |= I40E_TX_FLAGS_TSYN; 2985 2986 skb_tx_timestamp(skb); 2987 2988 /* always enable CRC insertion offload */ 2989 td_cmd |= I40E_TX_DESC_CMD_ICRC; 2990 2991 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 2992 cd_tunneling, cd_l2tag2); 2993 2994 /* Add Flow Director ATR if it's enabled. 2995 * 2996 * NOTE: this must always be directly before the data descriptor. 2997 */ 2998 i40e_atr(tx_ring, skb, tx_flags); 2999 3000 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 3001 td_cmd, td_offset); 3002 3003 return NETDEV_TX_OK; 3004 3005 out_drop: 3006 dev_kfree_skb_any(first->skb); 3007 first->skb = NULL; 3008 return NETDEV_TX_OK; 3009 } 3010 3011 /** 3012 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer 3013 * @skb: send buffer 3014 * @netdev: network interface device structure 3015 * 3016 * Returns NETDEV_TX_OK if sent, else an error code 3017 **/ 3018 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 3019 { 3020 struct i40e_netdev_priv *np = netdev_priv(netdev); 3021 struct i40e_vsi *vsi = np->vsi; 3022 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; 3023 3024 /* hardware can't handle really short frames, hardware padding works 3025 * beyond this point 3026 */ 3027 if (skb_put_padto(skb, I40E_MIN_TX_LEN)) 3028 return NETDEV_TX_OK; 3029 3030 return i40e_xmit_frame_ring(skb, tx_ring); 3031 } 3032