1 /******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Driver 4 * Copyright(c) 2013 - 2016 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27 #include <linux/prefetch.h> 28 #include <net/busy_poll.h> 29 #include "i40e.h" 30 #include "i40e_prototype.h" 31 32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 33 u32 td_tag) 34 { 35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 40 } 41 42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 43 /** 44 * i40e_fdir - Generate a Flow Director descriptor based on fdata 45 * @tx_ring: Tx ring to send buffer on 46 * @fdata: Flow director filter data 47 * @add: Indicate if we are adding a rule or deleting one 48 * 49 **/ 50 static void i40e_fdir(struct i40e_ring *tx_ring, 51 struct i40e_fdir_filter *fdata, bool add) 52 { 53 struct i40e_filter_program_desc *fdir_desc; 54 struct i40e_pf *pf = tx_ring->vsi->back; 55 u32 flex_ptype, dtype_cmd; 56 u16 i; 57 58 /* grab the next descriptor */ 59 i = tx_ring->next_to_use; 60 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 61 62 i++; 63 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 64 65 flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & 66 (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); 67 68 flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & 69 (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); 70 71 flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & 72 (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); 73 74 /* Use LAN VSI Id if not programmed by user */ 75 flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & 76 ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << 77 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); 78 79 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; 80 81 dtype_cmd |= add ? 82 I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 83 I40E_TXD_FLTR_QW1_PCMD_SHIFT : 84 I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 85 I40E_TXD_FLTR_QW1_PCMD_SHIFT; 86 87 dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & 88 (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); 89 90 dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & 91 (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); 92 93 if (fdata->cnt_index) { 94 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 95 dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & 96 ((u32)fdata->cnt_index << 97 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); 98 } 99 100 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); 101 fdir_desc->rsvd = cpu_to_le32(0); 102 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 103 fdir_desc->fd_id = cpu_to_le32(fdata->fd_id); 104 } 105 106 #define I40E_FD_CLEAN_DELAY 10 107 /** 108 * i40e_program_fdir_filter - Program a Flow Director filter 109 * @fdir_data: Packet data that will be filter parameters 110 * @raw_packet: the pre-allocated packet buffer for FDir 111 * @pf: The PF pointer 112 * @add: True for add/update, False for remove 113 **/ 114 static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, 115 u8 *raw_packet, struct i40e_pf *pf, 116 bool add) 117 { 118 struct i40e_tx_buffer *tx_buf, *first; 119 struct i40e_tx_desc *tx_desc; 120 struct i40e_ring *tx_ring; 121 struct i40e_vsi *vsi; 122 struct device *dev; 123 dma_addr_t dma; 124 u32 td_cmd = 0; 125 u16 delay = 0; 126 u16 i; 127 128 /* find existing FDIR VSI */ 129 vsi = NULL; 130 for (i = 0; i < pf->num_alloc_vsi; i++) 131 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) 132 vsi = pf->vsi[i]; 133 if (!vsi) 134 return -ENOENT; 135 136 tx_ring = vsi->tx_rings[0]; 137 dev = tx_ring->dev; 138 139 /* we need two descriptors to add/del a filter and we can wait */ 140 do { 141 if (I40E_DESC_UNUSED(tx_ring) > 1) 142 break; 143 msleep_interruptible(1); 144 delay++; 145 } while (delay < I40E_FD_CLEAN_DELAY); 146 147 if (!(I40E_DESC_UNUSED(tx_ring) > 1)) 148 return -EAGAIN; 149 150 dma = dma_map_single(dev, raw_packet, 151 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); 152 if (dma_mapping_error(dev, dma)) 153 goto dma_fail; 154 155 /* grab the next descriptor */ 156 i = tx_ring->next_to_use; 157 first = &tx_ring->tx_bi[i]; 158 i40e_fdir(tx_ring, fdir_data, add); 159 160 /* Now program a dummy descriptor */ 161 i = tx_ring->next_to_use; 162 tx_desc = I40E_TX_DESC(tx_ring, i); 163 tx_buf = &tx_ring->tx_bi[i]; 164 165 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; 166 167 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer)); 168 169 /* record length, and DMA address */ 170 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE); 171 dma_unmap_addr_set(tx_buf, dma, dma); 172 173 tx_desc->buffer_addr = cpu_to_le64(dma); 174 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; 175 176 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB; 177 tx_buf->raw_buf = (void *)raw_packet; 178 179 tx_desc->cmd_type_offset_bsz = 180 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0); 181 182 /* Force memory writes to complete before letting h/w 183 * know there are new descriptors to fetch. 184 */ 185 wmb(); 186 187 /* Mark the data descriptor to be watched */ 188 first->next_to_watch = tx_desc; 189 190 writel(tx_ring->next_to_use, tx_ring->tail); 191 return 0; 192 193 dma_fail: 194 return -1; 195 } 196 197 #define IP_HEADER_OFFSET 14 198 #define I40E_UDPIP_DUMMY_PACKET_LEN 42 199 /** 200 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters 201 * @vsi: pointer to the targeted VSI 202 * @fd_data: the flow director data required for the FDir descriptor 203 * @add: true adds a filter, false removes it 204 * 205 * Returns 0 if the filters were successfully added or removed 206 **/ 207 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, 208 struct i40e_fdir_filter *fd_data, 209 bool add) 210 { 211 struct i40e_pf *pf = vsi->back; 212 struct udphdr *udp; 213 struct iphdr *ip; 214 bool err = false; 215 u8 *raw_packet; 216 int ret; 217 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 218 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0, 219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 220 221 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 222 if (!raw_packet) 223 return -ENOMEM; 224 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN); 225 226 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 227 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET 228 + sizeof(struct iphdr)); 229 230 ip->daddr = fd_data->dst_ip[0]; 231 udp->dest = fd_data->dst_port; 232 ip->saddr = fd_data->src_ip[0]; 233 udp->source = fd_data->src_port; 234 235 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; 236 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 237 if (ret) { 238 dev_info(&pf->pdev->dev, 239 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 240 fd_data->pctype, fd_data->fd_id, ret); 241 err = true; 242 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { 243 if (add) 244 dev_info(&pf->pdev->dev, 245 "Filter OK for PCTYPE %d loc = %d\n", 246 fd_data->pctype, fd_data->fd_id); 247 else 248 dev_info(&pf->pdev->dev, 249 "Filter deleted for PCTYPE %d loc = %d\n", 250 fd_data->pctype, fd_data->fd_id); 251 } 252 if (err) 253 kfree(raw_packet); 254 255 return err ? -EOPNOTSUPP : 0; 256 } 257 258 #define I40E_TCPIP_DUMMY_PACKET_LEN 54 259 /** 260 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters 261 * @vsi: pointer to the targeted VSI 262 * @fd_data: the flow director data required for the FDir descriptor 263 * @add: true adds a filter, false removes it 264 * 265 * Returns 0 if the filters were successfully added or removed 266 **/ 267 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, 268 struct i40e_fdir_filter *fd_data, 269 bool add) 270 { 271 struct i40e_pf *pf = vsi->back; 272 struct tcphdr *tcp; 273 struct iphdr *ip; 274 bool err = false; 275 u8 *raw_packet; 276 int ret; 277 /* Dummy packet */ 278 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 279 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0, 280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11, 281 0x0, 0x72, 0, 0, 0, 0}; 282 283 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 284 if (!raw_packet) 285 return -ENOMEM; 286 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN); 287 288 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 289 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET 290 + sizeof(struct iphdr)); 291 292 ip->daddr = fd_data->dst_ip[0]; 293 tcp->dest = fd_data->dst_port; 294 ip->saddr = fd_data->src_ip[0]; 295 tcp->source = fd_data->src_port; 296 297 if (add) { 298 pf->fd_tcp_rule++; 299 if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && 300 I40E_DEBUG_FD & pf->hw.debug_mask) 301 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); 302 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; 303 } else { 304 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? 305 (pf->fd_tcp_rule - 1) : 0; 306 if (pf->fd_tcp_rule == 0) { 307 if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && 308 I40E_DEBUG_FD & pf->hw.debug_mask) 309 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); 310 pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; 311 } 312 } 313 314 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; 315 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 316 317 if (ret) { 318 dev_info(&pf->pdev->dev, 319 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 320 fd_data->pctype, fd_data->fd_id, ret); 321 err = true; 322 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { 323 if (add) 324 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n", 325 fd_data->pctype, fd_data->fd_id); 326 else 327 dev_info(&pf->pdev->dev, 328 "Filter deleted for PCTYPE %d loc = %d\n", 329 fd_data->pctype, fd_data->fd_id); 330 } 331 332 if (err) 333 kfree(raw_packet); 334 335 return err ? -EOPNOTSUPP : 0; 336 } 337 338 /** 339 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for 340 * a specific flow spec 341 * @vsi: pointer to the targeted VSI 342 * @fd_data: the flow director data required for the FDir descriptor 343 * @add: true adds a filter, false removes it 344 * 345 * Returns 0 if the filters were successfully added or removed 346 **/ 347 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, 348 struct i40e_fdir_filter *fd_data, 349 bool add) 350 { 351 return -EOPNOTSUPP; 352 } 353 354 #define I40E_IP_DUMMY_PACKET_LEN 34 355 /** 356 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for 357 * a specific flow spec 358 * @vsi: pointer to the targeted VSI 359 * @fd_data: the flow director data required for the FDir descriptor 360 * @add: true adds a filter, false removes it 361 * 362 * Returns 0 if the filters were successfully added or removed 363 **/ 364 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, 365 struct i40e_fdir_filter *fd_data, 366 bool add) 367 { 368 struct i40e_pf *pf = vsi->back; 369 struct iphdr *ip; 370 bool err = false; 371 u8 *raw_packet; 372 int ret; 373 int i; 374 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 375 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0, 376 0, 0, 0, 0}; 377 378 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; 379 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) { 380 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 381 if (!raw_packet) 382 return -ENOMEM; 383 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); 384 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 385 386 ip->saddr = fd_data->src_ip[0]; 387 ip->daddr = fd_data->dst_ip[0]; 388 ip->protocol = 0; 389 390 fd_data->pctype = i; 391 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 392 393 if (ret) { 394 dev_info(&pf->pdev->dev, 395 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 396 fd_data->pctype, fd_data->fd_id, ret); 397 err = true; 398 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { 399 if (add) 400 dev_info(&pf->pdev->dev, 401 "Filter OK for PCTYPE %d loc = %d\n", 402 fd_data->pctype, fd_data->fd_id); 403 else 404 dev_info(&pf->pdev->dev, 405 "Filter deleted for PCTYPE %d loc = %d\n", 406 fd_data->pctype, fd_data->fd_id); 407 } 408 } 409 410 if (err) 411 kfree(raw_packet); 412 413 return err ? -EOPNOTSUPP : 0; 414 } 415 416 /** 417 * i40e_add_del_fdir - Build raw packets to add/del fdir filter 418 * @vsi: pointer to the targeted VSI 419 * @cmd: command to get or set RX flow classification rules 420 * @add: true adds a filter, false removes it 421 * 422 **/ 423 int i40e_add_del_fdir(struct i40e_vsi *vsi, 424 struct i40e_fdir_filter *input, bool add) 425 { 426 struct i40e_pf *pf = vsi->back; 427 int ret; 428 429 switch (input->flow_type & ~FLOW_EXT) { 430 case TCP_V4_FLOW: 431 ret = i40e_add_del_fdir_tcpv4(vsi, input, add); 432 break; 433 case UDP_V4_FLOW: 434 ret = i40e_add_del_fdir_udpv4(vsi, input, add); 435 break; 436 case SCTP_V4_FLOW: 437 ret = i40e_add_del_fdir_sctpv4(vsi, input, add); 438 break; 439 case IPV4_FLOW: 440 ret = i40e_add_del_fdir_ipv4(vsi, input, add); 441 break; 442 case IP_USER_FLOW: 443 switch (input->ip4_proto) { 444 case IPPROTO_TCP: 445 ret = i40e_add_del_fdir_tcpv4(vsi, input, add); 446 break; 447 case IPPROTO_UDP: 448 ret = i40e_add_del_fdir_udpv4(vsi, input, add); 449 break; 450 case IPPROTO_SCTP: 451 ret = i40e_add_del_fdir_sctpv4(vsi, input, add); 452 break; 453 default: 454 ret = i40e_add_del_fdir_ipv4(vsi, input, add); 455 break; 456 } 457 break; 458 default: 459 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", 460 input->flow_type); 461 ret = -EINVAL; 462 } 463 464 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */ 465 return ret; 466 } 467 468 /** 469 * i40e_fd_handle_status - check the Programming Status for FD 470 * @rx_ring: the Rx ring for this descriptor 471 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor. 472 * @prog_id: the id originally used for programming 473 * 474 * This is used to verify if the FD programming or invalidation 475 * requested by SW to the HW is successful or not and take actions accordingly. 476 **/ 477 static void i40e_fd_handle_status(struct i40e_ring *rx_ring, 478 union i40e_rx_desc *rx_desc, u8 prog_id) 479 { 480 struct i40e_pf *pf = rx_ring->vsi->back; 481 struct pci_dev *pdev = pf->pdev; 482 u32 fcnt_prog, fcnt_avail; 483 u32 error; 484 u64 qw; 485 486 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 487 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> 488 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; 489 490 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { 491 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id); 492 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) || 493 (I40E_DEBUG_FD & pf->hw.debug_mask)) 494 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", 495 pf->fd_inv); 496 497 /* Check if the programming error is for ATR. 498 * If so, auto disable ATR and set a state for 499 * flush in progress. Next time we come here if flush is in 500 * progress do nothing, once flush is complete the state will 501 * be cleared. 502 */ 503 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) 504 return; 505 506 pf->fd_add_err++; 507 /* store the current atr filter count */ 508 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf); 509 510 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) && 511 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) { 512 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; 513 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); 514 } 515 516 /* filter programming failed most likely due to table full */ 517 fcnt_prog = i40e_get_global_fd_count(pf); 518 fcnt_avail = pf->fdir_pf_filter_count; 519 /* If ATR is running fcnt_prog can quickly change, 520 * if we are very close to full, it makes sense to disable 521 * FD ATR/SB and then re-enable it when there is room. 522 */ 523 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { 524 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && 525 !(pf->auto_disable_flags & 526 I40E_FLAG_FD_SB_ENABLED)) { 527 if (I40E_DEBUG_FD & pf->hw.debug_mask) 528 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); 529 pf->auto_disable_flags |= 530 I40E_FLAG_FD_SB_ENABLED; 531 } 532 } 533 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { 534 if (I40E_DEBUG_FD & pf->hw.debug_mask) 535 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n", 536 rx_desc->wb.qword0.hi_dword.fd_id); 537 } 538 } 539 540 /** 541 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 542 * @ring: the ring that owns the buffer 543 * @tx_buffer: the buffer to free 544 **/ 545 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 546 struct i40e_tx_buffer *tx_buffer) 547 { 548 if (tx_buffer->skb) { 549 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) 550 kfree(tx_buffer->raw_buf); 551 else 552 dev_kfree_skb_any(tx_buffer->skb); 553 if (dma_unmap_len(tx_buffer, len)) 554 dma_unmap_single(ring->dev, 555 dma_unmap_addr(tx_buffer, dma), 556 dma_unmap_len(tx_buffer, len), 557 DMA_TO_DEVICE); 558 } else if (dma_unmap_len(tx_buffer, len)) { 559 dma_unmap_page(ring->dev, 560 dma_unmap_addr(tx_buffer, dma), 561 dma_unmap_len(tx_buffer, len), 562 DMA_TO_DEVICE); 563 } 564 565 tx_buffer->next_to_watch = NULL; 566 tx_buffer->skb = NULL; 567 dma_unmap_len_set(tx_buffer, len, 0); 568 /* tx_buffer must be completely set up in the transmit path */ 569 } 570 571 /** 572 * i40e_clean_tx_ring - Free any empty Tx buffers 573 * @tx_ring: ring to be cleaned 574 **/ 575 void i40e_clean_tx_ring(struct i40e_ring *tx_ring) 576 { 577 unsigned long bi_size; 578 u16 i; 579 580 /* ring already cleared, nothing to do */ 581 if (!tx_ring->tx_bi) 582 return; 583 584 /* Free all the Tx ring sk_buffs */ 585 for (i = 0; i < tx_ring->count; i++) 586 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 587 588 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 589 memset(tx_ring->tx_bi, 0, bi_size); 590 591 /* Zero out the descriptor ring */ 592 memset(tx_ring->desc, 0, tx_ring->size); 593 594 tx_ring->next_to_use = 0; 595 tx_ring->next_to_clean = 0; 596 597 if (!tx_ring->netdev) 598 return; 599 600 /* cleanup Tx queue statistics */ 601 netdev_tx_reset_queue(txring_txq(tx_ring)); 602 } 603 604 /** 605 * i40e_free_tx_resources - Free Tx resources per queue 606 * @tx_ring: Tx descriptor ring for a specific queue 607 * 608 * Free all transmit software resources 609 **/ 610 void i40e_free_tx_resources(struct i40e_ring *tx_ring) 611 { 612 i40e_clean_tx_ring(tx_ring); 613 kfree(tx_ring->tx_bi); 614 tx_ring->tx_bi = NULL; 615 616 if (tx_ring->desc) { 617 dma_free_coherent(tx_ring->dev, tx_ring->size, 618 tx_ring->desc, tx_ring->dma); 619 tx_ring->desc = NULL; 620 } 621 } 622 623 /** 624 * i40e_get_tx_pending - how many tx descriptors not processed 625 * @tx_ring: the ring of descriptors 626 * @in_sw: is tx_pending being checked in SW or HW 627 * 628 * Since there is no access to the ring head register 629 * in XL710, we need to use our local copies 630 **/ 631 u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) 632 { 633 u32 head, tail; 634 635 if (!in_sw) 636 head = i40e_get_head(ring); 637 else 638 head = ring->next_to_clean; 639 tail = readl(ring->tail); 640 641 if (head != tail) 642 return (head < tail) ? 643 tail - head : (tail + ring->count - head); 644 645 return 0; 646 } 647 648 #define WB_STRIDE 0x3 649 650 /** 651 * i40e_clean_tx_irq - Reclaim resources after transmit completes 652 * @vsi: the VSI we care about 653 * @tx_ring: Tx ring to clean 654 * @napi_budget: Used to determine if we are in netpoll 655 * 656 * Returns true if there's any budget left (e.g. the clean is finished) 657 **/ 658 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, 659 struct i40e_ring *tx_ring, int napi_budget) 660 { 661 u16 i = tx_ring->next_to_clean; 662 struct i40e_tx_buffer *tx_buf; 663 struct i40e_tx_desc *tx_head; 664 struct i40e_tx_desc *tx_desc; 665 unsigned int total_bytes = 0, total_packets = 0; 666 unsigned int budget = vsi->work_limit; 667 668 tx_buf = &tx_ring->tx_bi[i]; 669 tx_desc = I40E_TX_DESC(tx_ring, i); 670 i -= tx_ring->count; 671 672 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); 673 674 do { 675 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 676 677 /* if next_to_watch is not set then there is no work pending */ 678 if (!eop_desc) 679 break; 680 681 /* prevent any other reads prior to eop_desc */ 682 read_barrier_depends(); 683 684 /* we have caught up to head, no work left to do */ 685 if (tx_head == tx_desc) 686 break; 687 688 /* clear next_to_watch to prevent false hangs */ 689 tx_buf->next_to_watch = NULL; 690 691 /* update the statistics for this packet */ 692 total_bytes += tx_buf->bytecount; 693 total_packets += tx_buf->gso_segs; 694 695 /* free the skb */ 696 napi_consume_skb(tx_buf->skb, napi_budget); 697 698 /* unmap skb header data */ 699 dma_unmap_single(tx_ring->dev, 700 dma_unmap_addr(tx_buf, dma), 701 dma_unmap_len(tx_buf, len), 702 DMA_TO_DEVICE); 703 704 /* clear tx_buffer data */ 705 tx_buf->skb = NULL; 706 dma_unmap_len_set(tx_buf, len, 0); 707 708 /* unmap remaining buffers */ 709 while (tx_desc != eop_desc) { 710 711 tx_buf++; 712 tx_desc++; 713 i++; 714 if (unlikely(!i)) { 715 i -= tx_ring->count; 716 tx_buf = tx_ring->tx_bi; 717 tx_desc = I40E_TX_DESC(tx_ring, 0); 718 } 719 720 /* unmap any remaining paged data */ 721 if (dma_unmap_len(tx_buf, len)) { 722 dma_unmap_page(tx_ring->dev, 723 dma_unmap_addr(tx_buf, dma), 724 dma_unmap_len(tx_buf, len), 725 DMA_TO_DEVICE); 726 dma_unmap_len_set(tx_buf, len, 0); 727 } 728 } 729 730 /* move us one more past the eop_desc for start of next pkt */ 731 tx_buf++; 732 tx_desc++; 733 i++; 734 if (unlikely(!i)) { 735 i -= tx_ring->count; 736 tx_buf = tx_ring->tx_bi; 737 tx_desc = I40E_TX_DESC(tx_ring, 0); 738 } 739 740 prefetch(tx_desc); 741 742 /* update budget accounting */ 743 budget--; 744 } while (likely(budget)); 745 746 i += tx_ring->count; 747 tx_ring->next_to_clean = i; 748 u64_stats_update_begin(&tx_ring->syncp); 749 tx_ring->stats.bytes += total_bytes; 750 tx_ring->stats.packets += total_packets; 751 u64_stats_update_end(&tx_ring->syncp); 752 tx_ring->q_vector->tx.total_bytes += total_bytes; 753 tx_ring->q_vector->tx.total_packets += total_packets; 754 755 if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { 756 /* check to see if there are < 4 descriptors 757 * waiting to be written back, then kick the hardware to force 758 * them to be written back in case we stay in NAPI. 759 * In this mode on X722 we do not enable Interrupt. 760 */ 761 unsigned int j = i40e_get_tx_pending(tx_ring, false); 762 763 if (budget && 764 ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && 765 !test_bit(__I40E_DOWN, &vsi->state) && 766 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) 767 tx_ring->arm_wb = true; 768 } 769 770 /* notify netdev of completed buffers */ 771 netdev_tx_completed_queue(txring_txq(tx_ring), 772 total_packets, total_bytes); 773 774 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 775 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 776 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 777 /* Make sure that anybody stopping the queue after this 778 * sees the new next_to_clean. 779 */ 780 smp_mb(); 781 if (__netif_subqueue_stopped(tx_ring->netdev, 782 tx_ring->queue_index) && 783 !test_bit(__I40E_DOWN, &vsi->state)) { 784 netif_wake_subqueue(tx_ring->netdev, 785 tx_ring->queue_index); 786 ++tx_ring->tx_stats.restart_queue; 787 } 788 } 789 790 return !!budget; 791 } 792 793 /** 794 * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled 795 * @vsi: the VSI we care about 796 * @q_vector: the vector on which to enable writeback 797 * 798 **/ 799 static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, 800 struct i40e_q_vector *q_vector) 801 { 802 u16 flags = q_vector->tx.ring[0].flags; 803 u32 val; 804 805 if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) 806 return; 807 808 if (q_vector->arm_wb_state) 809 return; 810 811 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 812 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | 813 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ 814 815 wr32(&vsi->back->hw, 816 I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), 817 val); 818 } else { 819 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK | 820 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */ 821 822 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); 823 } 824 q_vector->arm_wb_state = true; 825 } 826 827 /** 828 * i40e_force_wb - Issue SW Interrupt so HW does a wb 829 * @vsi: the VSI we care about 830 * @q_vector: the vector on which to force writeback 831 * 832 **/ 833 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) 834 { 835 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 836 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | 837 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ 838 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | 839 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK; 840 /* allow 00 to be written to the index */ 841 842 wr32(&vsi->back->hw, 843 I40E_PFINT_DYN_CTLN(q_vector->v_idx + 844 vsi->base_vector - 1), val); 845 } else { 846 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK | 847 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */ 848 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK | 849 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK; 850 /* allow 00 to be written to the index */ 851 852 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); 853 } 854 } 855 856 /** 857 * i40e_set_new_dynamic_itr - Find new ITR level 858 * @rc: structure containing ring performance data 859 * 860 * Returns true if ITR changed, false if not 861 * 862 * Stores a new ITR value based on packets and byte counts during 863 * the last interrupt. The advantage of per interrupt computation 864 * is faster updates and more accurate ITR for the current traffic 865 * pattern. Constants in this function were computed based on 866 * theoretical maximum wire speed and thresholds were set based on 867 * testing data as well as attempting to minimize response time 868 * while increasing bulk throughput. 869 **/ 870 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 871 { 872 enum i40e_latency_range new_latency_range = rc->latency_range; 873 struct i40e_q_vector *qv = rc->ring->q_vector; 874 u32 new_itr = rc->itr; 875 int bytes_per_int; 876 int usecs; 877 878 if (rc->total_packets == 0 || !rc->itr) 879 return false; 880 881 /* simple throttlerate management 882 * 0-10MB/s lowest (50000 ints/s) 883 * 10-20MB/s low (20000 ints/s) 884 * 20-1249MB/s bulk (18000 ints/s) 885 * > 40000 Rx packets per second (8000 ints/s) 886 * 887 * The math works out because the divisor is in 10^(-6) which 888 * turns the bytes/us input value into MB/s values, but 889 * make sure to use usecs, as the register values written 890 * are in 2 usec increments in the ITR registers, and make sure 891 * to use the smoothed values that the countdown timer gives us. 892 */ 893 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; 894 bytes_per_int = rc->total_bytes / usecs; 895 896 switch (new_latency_range) { 897 case I40E_LOWEST_LATENCY: 898 if (bytes_per_int > 10) 899 new_latency_range = I40E_LOW_LATENCY; 900 break; 901 case I40E_LOW_LATENCY: 902 if (bytes_per_int > 20) 903 new_latency_range = I40E_BULK_LATENCY; 904 else if (bytes_per_int <= 10) 905 new_latency_range = I40E_LOWEST_LATENCY; 906 break; 907 case I40E_BULK_LATENCY: 908 case I40E_ULTRA_LATENCY: 909 default: 910 if (bytes_per_int <= 20) 911 new_latency_range = I40E_LOW_LATENCY; 912 break; 913 } 914 915 /* this is to adjust RX more aggressively when streaming small 916 * packets. The value of 40000 was picked as it is just beyond 917 * what the hardware can receive per second if in low latency 918 * mode. 919 */ 920 #define RX_ULTRA_PACKET_RATE 40000 921 922 if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && 923 (&qv->rx == rc)) 924 new_latency_range = I40E_ULTRA_LATENCY; 925 926 rc->latency_range = new_latency_range; 927 928 switch (new_latency_range) { 929 case I40E_LOWEST_LATENCY: 930 new_itr = I40E_ITR_50K; 931 break; 932 case I40E_LOW_LATENCY: 933 new_itr = I40E_ITR_20K; 934 break; 935 case I40E_BULK_LATENCY: 936 new_itr = I40E_ITR_18K; 937 break; 938 case I40E_ULTRA_LATENCY: 939 new_itr = I40E_ITR_8K; 940 break; 941 default: 942 break; 943 } 944 945 rc->total_bytes = 0; 946 rc->total_packets = 0; 947 948 if (new_itr != rc->itr) { 949 rc->itr = new_itr; 950 return true; 951 } 952 953 return false; 954 } 955 956 /** 957 * i40e_clean_programming_status - clean the programming status descriptor 958 * @rx_ring: the rx ring that has this descriptor 959 * @rx_desc: the rx descriptor written back by HW 960 * 961 * Flow director should handle FD_FILTER_STATUS to check its filter programming 962 * status being successful or not and take actions accordingly. FCoE should 963 * handle its context/filter programming/invalidation status and take actions. 964 * 965 **/ 966 static void i40e_clean_programming_status(struct i40e_ring *rx_ring, 967 union i40e_rx_desc *rx_desc) 968 { 969 u64 qw; 970 u8 id; 971 972 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 973 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> 974 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; 975 976 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) 977 i40e_fd_handle_status(rx_ring, rx_desc, id); 978 #ifdef I40E_FCOE 979 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) || 980 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS)) 981 i40e_fcoe_handle_status(rx_ring, rx_desc, id); 982 #endif 983 } 984 985 /** 986 * i40e_setup_tx_descriptors - Allocate the Tx descriptors 987 * @tx_ring: the tx ring to set up 988 * 989 * Return 0 on success, negative on error 990 **/ 991 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) 992 { 993 struct device *dev = tx_ring->dev; 994 int bi_size; 995 996 if (!dev) 997 return -ENOMEM; 998 999 /* warn if we are about to overwrite the pointer */ 1000 WARN_ON(tx_ring->tx_bi); 1001 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 1002 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 1003 if (!tx_ring->tx_bi) 1004 goto err; 1005 1006 /* round up to nearest 4K */ 1007 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 1008 /* add u32 for head writeback, align after this takes care of 1009 * guaranteeing this is at least one cache line in size 1010 */ 1011 tx_ring->size += sizeof(u32); 1012 tx_ring->size = ALIGN(tx_ring->size, 4096); 1013 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 1014 &tx_ring->dma, GFP_KERNEL); 1015 if (!tx_ring->desc) { 1016 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 1017 tx_ring->size); 1018 goto err; 1019 } 1020 1021 tx_ring->next_to_use = 0; 1022 tx_ring->next_to_clean = 0; 1023 return 0; 1024 1025 err: 1026 kfree(tx_ring->tx_bi); 1027 tx_ring->tx_bi = NULL; 1028 return -ENOMEM; 1029 } 1030 1031 /** 1032 * i40e_clean_rx_ring - Free Rx buffers 1033 * @rx_ring: ring to be cleaned 1034 **/ 1035 void i40e_clean_rx_ring(struct i40e_ring *rx_ring) 1036 { 1037 struct device *dev = rx_ring->dev; 1038 unsigned long bi_size; 1039 u16 i; 1040 1041 /* ring already cleared, nothing to do */ 1042 if (!rx_ring->rx_bi) 1043 return; 1044 1045 /* Free all the Rx ring sk_buffs */ 1046 for (i = 0; i < rx_ring->count; i++) { 1047 struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; 1048 1049 if (rx_bi->skb) { 1050 dev_kfree_skb(rx_bi->skb); 1051 rx_bi->skb = NULL; 1052 } 1053 if (!rx_bi->page) 1054 continue; 1055 1056 dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE); 1057 __free_pages(rx_bi->page, 0); 1058 1059 rx_bi->page = NULL; 1060 rx_bi->page_offset = 0; 1061 } 1062 1063 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 1064 memset(rx_ring->rx_bi, 0, bi_size); 1065 1066 /* Zero out the descriptor ring */ 1067 memset(rx_ring->desc, 0, rx_ring->size); 1068 1069 rx_ring->next_to_alloc = 0; 1070 rx_ring->next_to_clean = 0; 1071 rx_ring->next_to_use = 0; 1072 } 1073 1074 /** 1075 * i40e_free_rx_resources - Free Rx resources 1076 * @rx_ring: ring to clean the resources from 1077 * 1078 * Free all receive software resources 1079 **/ 1080 void i40e_free_rx_resources(struct i40e_ring *rx_ring) 1081 { 1082 i40e_clean_rx_ring(rx_ring); 1083 kfree(rx_ring->rx_bi); 1084 rx_ring->rx_bi = NULL; 1085 1086 if (rx_ring->desc) { 1087 dma_free_coherent(rx_ring->dev, rx_ring->size, 1088 rx_ring->desc, rx_ring->dma); 1089 rx_ring->desc = NULL; 1090 } 1091 } 1092 1093 /** 1094 * i40e_setup_rx_descriptors - Allocate Rx descriptors 1095 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 1096 * 1097 * Returns 0 on success, negative on failure 1098 **/ 1099 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) 1100 { 1101 struct device *dev = rx_ring->dev; 1102 int bi_size; 1103 1104 /* warn if we are about to overwrite the pointer */ 1105 WARN_ON(rx_ring->rx_bi); 1106 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 1107 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 1108 if (!rx_ring->rx_bi) 1109 goto err; 1110 1111 u64_stats_init(&rx_ring->syncp); 1112 1113 /* Round up to nearest 4K */ 1114 rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc); 1115 rx_ring->size = ALIGN(rx_ring->size, 4096); 1116 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 1117 &rx_ring->dma, GFP_KERNEL); 1118 1119 if (!rx_ring->desc) { 1120 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 1121 rx_ring->size); 1122 goto err; 1123 } 1124 1125 rx_ring->next_to_alloc = 0; 1126 rx_ring->next_to_clean = 0; 1127 rx_ring->next_to_use = 0; 1128 1129 return 0; 1130 err: 1131 kfree(rx_ring->rx_bi); 1132 rx_ring->rx_bi = NULL; 1133 return -ENOMEM; 1134 } 1135 1136 /** 1137 * i40e_release_rx_desc - Store the new tail and head values 1138 * @rx_ring: ring to bump 1139 * @val: new head index 1140 **/ 1141 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 1142 { 1143 rx_ring->next_to_use = val; 1144 1145 /* update next to alloc since we have filled the ring */ 1146 rx_ring->next_to_alloc = val; 1147 1148 /* Force memory writes to complete before letting h/w 1149 * know there are new descriptors to fetch. (Only 1150 * applicable for weak-ordered memory model archs, 1151 * such as IA-64). 1152 */ 1153 wmb(); 1154 writel(val, rx_ring->tail); 1155 } 1156 1157 /** 1158 * i40e_alloc_mapped_page - recycle or make a new page 1159 * @rx_ring: ring to use 1160 * @bi: rx_buffer struct to modify 1161 * 1162 * Returns true if the page was successfully allocated or 1163 * reused. 1164 **/ 1165 static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, 1166 struct i40e_rx_buffer *bi) 1167 { 1168 struct page *page = bi->page; 1169 dma_addr_t dma; 1170 1171 /* since we are recycling buffers we should seldom need to alloc */ 1172 if (likely(page)) { 1173 rx_ring->rx_stats.page_reuse_count++; 1174 return true; 1175 } 1176 1177 /* alloc new page for storage */ 1178 page = dev_alloc_page(); 1179 if (unlikely(!page)) { 1180 rx_ring->rx_stats.alloc_page_failed++; 1181 return false; 1182 } 1183 1184 /* map page for use */ 1185 dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1186 1187 /* if mapping failed free memory back to system since 1188 * there isn't much point in holding memory we can't use 1189 */ 1190 if (dma_mapping_error(rx_ring->dev, dma)) { 1191 __free_pages(page, 0); 1192 rx_ring->rx_stats.alloc_page_failed++; 1193 return false; 1194 } 1195 1196 bi->dma = dma; 1197 bi->page = page; 1198 bi->page_offset = 0; 1199 1200 return true; 1201 } 1202 1203 /** 1204 * i40e_receive_skb - Send a completed packet up the stack 1205 * @rx_ring: rx ring in play 1206 * @skb: packet to send up 1207 * @vlan_tag: vlan tag for packet 1208 **/ 1209 static void i40e_receive_skb(struct i40e_ring *rx_ring, 1210 struct sk_buff *skb, u16 vlan_tag) 1211 { 1212 struct i40e_q_vector *q_vector = rx_ring->q_vector; 1213 1214 if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1215 (vlan_tag & VLAN_VID_MASK)) 1216 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 1217 1218 napi_gro_receive(&q_vector->napi, skb); 1219 } 1220 1221 /** 1222 * i40e_alloc_rx_buffers - Replace used receive buffers 1223 * @rx_ring: ring to place buffers on 1224 * @cleaned_count: number of buffers to replace 1225 * 1226 * Returns false if all allocations were successful, true if any fail 1227 **/ 1228 bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) 1229 { 1230 u16 ntu = rx_ring->next_to_use; 1231 union i40e_rx_desc *rx_desc; 1232 struct i40e_rx_buffer *bi; 1233 1234 /* do nothing if no valid netdev defined */ 1235 if (!rx_ring->netdev || !cleaned_count) 1236 return false; 1237 1238 rx_desc = I40E_RX_DESC(rx_ring, ntu); 1239 bi = &rx_ring->rx_bi[ntu]; 1240 1241 do { 1242 if (!i40e_alloc_mapped_page(rx_ring, bi)) 1243 goto no_buffers; 1244 1245 /* Refresh the desc even if buffer_addrs didn't change 1246 * because each write-back erases this info. 1247 */ 1248 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 1249 rx_desc->read.hdr_addr = 0; 1250 1251 rx_desc++; 1252 bi++; 1253 ntu++; 1254 if (unlikely(ntu == rx_ring->count)) { 1255 rx_desc = I40E_RX_DESC(rx_ring, 0); 1256 bi = rx_ring->rx_bi; 1257 ntu = 0; 1258 } 1259 1260 /* clear the status bits for the next_to_use descriptor */ 1261 rx_desc->wb.qword1.status_error_len = 0; 1262 1263 cleaned_count--; 1264 } while (cleaned_count); 1265 1266 if (rx_ring->next_to_use != ntu) 1267 i40e_release_rx_desc(rx_ring, ntu); 1268 1269 return false; 1270 1271 no_buffers: 1272 if (rx_ring->next_to_use != ntu) 1273 i40e_release_rx_desc(rx_ring, ntu); 1274 1275 /* make sure to come back via polling to try again after 1276 * allocation failure 1277 */ 1278 return true; 1279 } 1280 1281 /** 1282 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 1283 * @vsi: the VSI we care about 1284 * @skb: skb currently being received and modified 1285 * @rx_desc: the receive descriptor 1286 * 1287 * skb->protocol must be set before this function is called 1288 **/ 1289 static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 1290 struct sk_buff *skb, 1291 union i40e_rx_desc *rx_desc) 1292 { 1293 struct i40e_rx_ptype_decoded decoded; 1294 u32 rx_error, rx_status; 1295 bool ipv4, ipv6; 1296 u8 ptype; 1297 u64 qword; 1298 1299 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1300 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; 1301 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1302 I40E_RXD_QW1_ERROR_SHIFT; 1303 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1304 I40E_RXD_QW1_STATUS_SHIFT; 1305 decoded = decode_rx_desc_ptype(ptype); 1306 1307 skb->ip_summed = CHECKSUM_NONE; 1308 1309 skb_checksum_none_assert(skb); 1310 1311 /* Rx csum enabled and ip headers found? */ 1312 if (!(vsi->netdev->features & NETIF_F_RXCSUM)) 1313 return; 1314 1315 /* did the hardware decode the packet and checksum? */ 1316 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 1317 return; 1318 1319 /* both known and outer_ip must be set for the below code to work */ 1320 if (!(decoded.known && decoded.outer_ip)) 1321 return; 1322 1323 ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && 1324 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); 1325 ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && 1326 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); 1327 1328 if (ipv4 && 1329 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | 1330 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) 1331 goto checksum_fail; 1332 1333 /* likely incorrect csum if alternate IP extension headers found */ 1334 if (ipv6 && 1335 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 1336 /* don't increment checksum err here, non-fatal err */ 1337 return; 1338 1339 /* there was some L4 error, count error and punt packet to the stack */ 1340 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) 1341 goto checksum_fail; 1342 1343 /* handle packets that were not able to be checksummed due 1344 * to arrival speed, in this case the stack can compute 1345 * the csum. 1346 */ 1347 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) 1348 return; 1349 1350 /* If there is an outer header present that might contain a checksum 1351 * we need to bump the checksum level by 1 to reflect the fact that 1352 * we are indicating we validated the inner checksum. 1353 */ 1354 if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT) 1355 skb->csum_level = 1; 1356 1357 /* Only report checksum unnecessary for TCP, UDP, or SCTP */ 1358 switch (decoded.inner_prot) { 1359 case I40E_RX_PTYPE_INNER_PROT_TCP: 1360 case I40E_RX_PTYPE_INNER_PROT_UDP: 1361 case I40E_RX_PTYPE_INNER_PROT_SCTP: 1362 skb->ip_summed = CHECKSUM_UNNECESSARY; 1363 /* fall though */ 1364 default: 1365 break; 1366 } 1367 1368 return; 1369 1370 checksum_fail: 1371 vsi->back->hw_csum_rx_error++; 1372 } 1373 1374 /** 1375 * i40e_ptype_to_htype - get a hash type 1376 * @ptype: the ptype value from the descriptor 1377 * 1378 * Returns a hash type to be used by skb_set_hash 1379 **/ 1380 static inline int i40e_ptype_to_htype(u8 ptype) 1381 { 1382 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); 1383 1384 if (!decoded.known) 1385 return PKT_HASH_TYPE_NONE; 1386 1387 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1388 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) 1389 return PKT_HASH_TYPE_L4; 1390 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1391 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) 1392 return PKT_HASH_TYPE_L3; 1393 else 1394 return PKT_HASH_TYPE_L2; 1395 } 1396 1397 /** 1398 * i40e_rx_hash - set the hash value in the skb 1399 * @ring: descriptor ring 1400 * @rx_desc: specific descriptor 1401 **/ 1402 static inline void i40e_rx_hash(struct i40e_ring *ring, 1403 union i40e_rx_desc *rx_desc, 1404 struct sk_buff *skb, 1405 u8 rx_ptype) 1406 { 1407 u32 hash; 1408 const __le64 rss_mask = 1409 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 1410 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 1411 1412 if (!(ring->netdev->features & NETIF_F_RXHASH)) 1413 return; 1414 1415 if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { 1416 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 1417 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); 1418 } 1419 } 1420 1421 /** 1422 * i40e_process_skb_fields - Populate skb header fields from Rx descriptor 1423 * @rx_ring: rx descriptor ring packet is being transacted on 1424 * @rx_desc: pointer to the EOP Rx descriptor 1425 * @skb: pointer to current skb being populated 1426 * @rx_ptype: the packet type decoded by hardware 1427 * 1428 * This function checks the ring, descriptor, and packet information in 1429 * order to populate the hash, checksum, VLAN, protocol, and 1430 * other fields within the skb. 1431 **/ 1432 static inline 1433 void i40e_process_skb_fields(struct i40e_ring *rx_ring, 1434 union i40e_rx_desc *rx_desc, struct sk_buff *skb, 1435 u8 rx_ptype) 1436 { 1437 u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1438 u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1439 I40E_RXD_QW1_STATUS_SHIFT; 1440 u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> 1441 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT; 1442 1443 if (unlikely(rsyn)) { 1444 i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn); 1445 rx_ring->last_rx_timestamp = jiffies; 1446 } 1447 1448 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); 1449 1450 /* modifies the skb - consumes the enet header */ 1451 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1452 1453 i40e_rx_checksum(rx_ring->vsi, skb, rx_desc); 1454 1455 skb_record_rx_queue(skb, rx_ring->queue_index); 1456 } 1457 1458 /** 1459 * i40e_pull_tail - i40e specific version of skb_pull_tail 1460 * @rx_ring: rx descriptor ring packet is being transacted on 1461 * @skb: pointer to current skb being adjusted 1462 * 1463 * This function is an i40e specific version of __pskb_pull_tail. The 1464 * main difference between this version and the original function is that 1465 * this function can make several assumptions about the state of things 1466 * that allow for significant optimizations versus the standard function. 1467 * As a result we can do things like drop a frag and maintain an accurate 1468 * truesize for the skb. 1469 */ 1470 static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) 1471 { 1472 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; 1473 unsigned char *va; 1474 unsigned int pull_len; 1475 1476 /* it is valid to use page_address instead of kmap since we are 1477 * working with pages allocated out of the lomem pool per 1478 * alloc_page(GFP_ATOMIC) 1479 */ 1480 va = skb_frag_address(frag); 1481 1482 /* we need the header to contain the greater of either ETH_HLEN or 1483 * 60 bytes if the skb->len is less than 60 for skb_pad. 1484 */ 1485 pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); 1486 1487 /* align pull length to size of long to optimize memcpy performance */ 1488 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); 1489 1490 /* update all of the pointers */ 1491 skb_frag_size_sub(frag, pull_len); 1492 frag->page_offset += pull_len; 1493 skb->data_len -= pull_len; 1494 skb->tail += pull_len; 1495 } 1496 1497 /** 1498 * i40e_cleanup_headers - Correct empty headers 1499 * @rx_ring: rx descriptor ring packet is being transacted on 1500 * @skb: pointer to current skb being fixed 1501 * 1502 * Also address the case where we are pulling data in on pages only 1503 * and as such no data is present in the skb header. 1504 * 1505 * In addition if skb is not at least 60 bytes we need to pad it so that 1506 * it is large enough to qualify as a valid Ethernet frame. 1507 * 1508 * Returns true if an error was encountered and skb was freed. 1509 **/ 1510 static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) 1511 { 1512 /* place header in linear portion of buffer */ 1513 if (skb_is_nonlinear(skb)) 1514 i40e_pull_tail(rx_ring, skb); 1515 1516 /* if eth_skb_pad returns an error the skb was freed */ 1517 if (eth_skb_pad(skb)) 1518 return true; 1519 1520 return false; 1521 } 1522 1523 /** 1524 * i40e_reuse_rx_page - page flip buffer and store it back on the ring 1525 * @rx_ring: rx descriptor ring to store buffers on 1526 * @old_buff: donor buffer to have page reused 1527 * 1528 * Synchronizes page for reuse by the adapter 1529 **/ 1530 static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, 1531 struct i40e_rx_buffer *old_buff) 1532 { 1533 struct i40e_rx_buffer *new_buff; 1534 u16 nta = rx_ring->next_to_alloc; 1535 1536 new_buff = &rx_ring->rx_bi[nta]; 1537 1538 /* update, and store next to alloc */ 1539 nta++; 1540 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 1541 1542 /* transfer page from old buffer to new buffer */ 1543 *new_buff = *old_buff; 1544 } 1545 1546 /** 1547 * i40e_page_is_reserved - check if reuse is possible 1548 * @page: page struct to check 1549 */ 1550 static inline bool i40e_page_is_reserved(struct page *page) 1551 { 1552 return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); 1553 } 1554 1555 /** 1556 * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff 1557 * @rx_ring: rx descriptor ring to transact packets on 1558 * @rx_buffer: buffer containing page to add 1559 * @rx_desc: descriptor containing length of buffer written by hardware 1560 * @skb: sk_buff to place the data into 1561 * 1562 * This function will add the data contained in rx_buffer->page to the skb. 1563 * This is done either through a direct copy if the data in the buffer is 1564 * less than the skb header size, otherwise it will just attach the page as 1565 * a frag to the skb. 1566 * 1567 * The function will then update the page offset if necessary and return 1568 * true if the buffer can be reused by the adapter. 1569 **/ 1570 static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, 1571 struct i40e_rx_buffer *rx_buffer, 1572 union i40e_rx_desc *rx_desc, 1573 struct sk_buff *skb) 1574 { 1575 struct page *page = rx_buffer->page; 1576 u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1577 unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1578 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1579 #if (PAGE_SIZE < 8192) 1580 unsigned int truesize = I40E_RXBUFFER_2048; 1581 #else 1582 unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); 1583 unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; 1584 #endif 1585 1586 /* will the data fit in the skb we allocated? if so, just 1587 * copy it as it is pretty small anyway 1588 */ 1589 if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { 1590 unsigned char *va = page_address(page) + rx_buffer->page_offset; 1591 1592 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); 1593 1594 /* page is not reserved, we can reuse buffer as-is */ 1595 if (likely(!i40e_page_is_reserved(page))) 1596 return true; 1597 1598 /* this page cannot be reused so discard it */ 1599 __free_pages(page, 0); 1600 return false; 1601 } 1602 1603 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 1604 rx_buffer->page_offset, size, truesize); 1605 1606 /* avoid re-using remote pages */ 1607 if (unlikely(i40e_page_is_reserved(page))) 1608 return false; 1609 1610 #if (PAGE_SIZE < 8192) 1611 /* if we are only owner of page we can reuse it */ 1612 if (unlikely(page_count(page) != 1)) 1613 return false; 1614 1615 /* flip page offset to other buffer */ 1616 rx_buffer->page_offset ^= truesize; 1617 #else 1618 /* move offset up to the next cache line */ 1619 rx_buffer->page_offset += truesize; 1620 1621 if (rx_buffer->page_offset > last_offset) 1622 return false; 1623 #endif 1624 1625 /* Even if we own the page, we are not allowed to use atomic_set() 1626 * This would break get_page_unless_zero() users. 1627 */ 1628 get_page(rx_buffer->page); 1629 1630 return true; 1631 } 1632 1633 /** 1634 * i40e_fetch_rx_buffer - Allocate skb and populate it 1635 * @rx_ring: rx descriptor ring to transact packets on 1636 * @rx_desc: descriptor containing info written by hardware 1637 * 1638 * This function allocates an skb on the fly, and populates it with the page 1639 * data from the current receive descriptor, taking care to set up the skb 1640 * correctly, as well as handling calling the page recycle function if 1641 * necessary. 1642 */ 1643 static inline 1644 struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, 1645 union i40e_rx_desc *rx_desc) 1646 { 1647 struct i40e_rx_buffer *rx_buffer; 1648 struct sk_buff *skb; 1649 struct page *page; 1650 1651 rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; 1652 page = rx_buffer->page; 1653 prefetchw(page); 1654 1655 skb = rx_buffer->skb; 1656 1657 if (likely(!skb)) { 1658 void *page_addr = page_address(page) + rx_buffer->page_offset; 1659 1660 /* prefetch first cache line of first page */ 1661 prefetch(page_addr); 1662 #if L1_CACHE_BYTES < 128 1663 prefetch(page_addr + L1_CACHE_BYTES); 1664 #endif 1665 1666 /* allocate a skb to store the frags */ 1667 skb = __napi_alloc_skb(&rx_ring->q_vector->napi, 1668 I40E_RX_HDR_SIZE, 1669 GFP_ATOMIC | __GFP_NOWARN); 1670 if (unlikely(!skb)) { 1671 rx_ring->rx_stats.alloc_buff_failed++; 1672 return NULL; 1673 } 1674 1675 /* we will be copying header into skb->data in 1676 * pskb_may_pull so it is in our interest to prefetch 1677 * it now to avoid a possible cache miss 1678 */ 1679 prefetchw(skb->data); 1680 } else { 1681 rx_buffer->skb = NULL; 1682 } 1683 1684 /* we are reusing so sync this buffer for CPU use */ 1685 dma_sync_single_range_for_cpu(rx_ring->dev, 1686 rx_buffer->dma, 1687 rx_buffer->page_offset, 1688 I40E_RXBUFFER_2048, 1689 DMA_FROM_DEVICE); 1690 1691 /* pull page into skb */ 1692 if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { 1693 /* hand second half of page back to the ring */ 1694 i40e_reuse_rx_page(rx_ring, rx_buffer); 1695 rx_ring->rx_stats.page_reuse_count++; 1696 } else { 1697 /* we are not reusing the buffer so unmap it */ 1698 dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, 1699 DMA_FROM_DEVICE); 1700 } 1701 1702 /* clear contents of buffer_info */ 1703 rx_buffer->page = NULL; 1704 1705 return skb; 1706 } 1707 1708 /** 1709 * i40e_is_non_eop - process handling of non-EOP buffers 1710 * @rx_ring: Rx ring being processed 1711 * @rx_desc: Rx descriptor for current buffer 1712 * @skb: Current socket buffer containing buffer in progress 1713 * 1714 * This function updates next to clean. If the buffer is an EOP buffer 1715 * this function exits returning false, otherwise it will place the 1716 * sk_buff in the next buffer to be chained and return true indicating 1717 * that this is in fact a non-EOP buffer. 1718 **/ 1719 static bool i40e_is_non_eop(struct i40e_ring *rx_ring, 1720 union i40e_rx_desc *rx_desc, 1721 struct sk_buff *skb) 1722 { 1723 u32 ntc = rx_ring->next_to_clean + 1; 1724 1725 /* fetch, update, and store next to clean */ 1726 ntc = (ntc < rx_ring->count) ? ntc : 0; 1727 rx_ring->next_to_clean = ntc; 1728 1729 prefetch(I40E_RX_DESC(rx_ring, ntc)); 1730 1731 #define staterrlen rx_desc->wb.qword1.status_error_len 1732 if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { 1733 i40e_clean_programming_status(rx_ring, rx_desc); 1734 rx_ring->rx_bi[ntc].skb = skb; 1735 return true; 1736 } 1737 /* if we are the last buffer then there is nothing else to do */ 1738 #define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT) 1739 if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) 1740 return false; 1741 1742 /* place skb in next buffer to be received */ 1743 rx_ring->rx_bi[ntc].skb = skb; 1744 rx_ring->rx_stats.non_eop_descs++; 1745 1746 return true; 1747 } 1748 1749 /** 1750 * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf 1751 * @rx_ring: rx descriptor ring to transact packets on 1752 * @budget: Total limit on number of packets to process 1753 * 1754 * This function provides a "bounce buffer" approach to Rx interrupt 1755 * processing. The advantage to this is that on systems that have 1756 * expensive overhead for IOMMU access this provides a means of avoiding 1757 * it by maintaining the mapping of the page to the system. 1758 * 1759 * Returns amount of work completed 1760 **/ 1761 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) 1762 { 1763 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 1764 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 1765 bool failure = false; 1766 1767 while (likely(total_rx_packets < budget)) { 1768 union i40e_rx_desc *rx_desc; 1769 struct sk_buff *skb; 1770 u32 rx_status; 1771 u16 vlan_tag; 1772 u8 rx_ptype; 1773 u64 qword; 1774 1775 /* return some buffers to hardware, one at a time is too slow */ 1776 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1777 failure = failure || 1778 i40e_alloc_rx_buffers(rx_ring, cleaned_count); 1779 cleaned_count = 0; 1780 } 1781 1782 rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); 1783 1784 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1785 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1786 I40E_RXD_QW1_PTYPE_SHIFT; 1787 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1788 I40E_RXD_QW1_STATUS_SHIFT; 1789 1790 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) 1791 break; 1792 1793 /* status_error_len will always be zero for unused descriptors 1794 * because it's cleared in cleanup, and overlaps with hdr_addr 1795 * which is always zero because packet split isn't used, if the 1796 * hardware wrote DD then it will be non-zero 1797 */ 1798 if (!rx_desc->wb.qword1.status_error_len) 1799 break; 1800 1801 /* This memory barrier is needed to keep us from reading 1802 * any other fields out of the rx_desc until we know the 1803 * DD bit is set. 1804 */ 1805 dma_rmb(); 1806 1807 skb = i40e_fetch_rx_buffer(rx_ring, rx_desc); 1808 if (!skb) 1809 break; 1810 1811 cleaned_count++; 1812 1813 if (i40e_is_non_eop(rx_ring, rx_desc, skb)) 1814 continue; 1815 1816 /* ERR_MASK will only have valid bits if EOP set, and 1817 * what we are doing here is actually checking 1818 * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in 1819 * the error field 1820 */ 1821 if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { 1822 dev_kfree_skb_any(skb); 1823 continue; 1824 } 1825 1826 if (i40e_cleanup_headers(rx_ring, skb)) 1827 continue; 1828 1829 /* probably a little skewed due to removing CRC */ 1830 total_rx_bytes += skb->len; 1831 1832 /* populate checksum, VLAN, and protocol */ 1833 i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); 1834 1835 #ifdef I40E_FCOE 1836 if (unlikely( 1837 i40e_rx_is_fcoe(rx_ptype) && 1838 !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) { 1839 dev_kfree_skb_any(skb); 1840 continue; 1841 } 1842 #endif 1843 1844 vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? 1845 le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; 1846 1847 i40e_receive_skb(rx_ring, skb, vlan_tag); 1848 1849 /* update budget accounting */ 1850 total_rx_packets++; 1851 } 1852 1853 u64_stats_update_begin(&rx_ring->syncp); 1854 rx_ring->stats.packets += total_rx_packets; 1855 rx_ring->stats.bytes += total_rx_bytes; 1856 u64_stats_update_end(&rx_ring->syncp); 1857 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1858 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1859 1860 /* guarantee a trip back through this routine if there was a failure */ 1861 return failure ? budget : total_rx_packets; 1862 } 1863 1864 static u32 i40e_buildreg_itr(const int type, const u16 itr) 1865 { 1866 u32 val; 1867 1868 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | 1869 /* Don't clear PBA because that can cause lost interrupts that 1870 * came in while we were cleaning/polling 1871 */ 1872 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | 1873 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); 1874 1875 return val; 1876 } 1877 1878 /* a small macro to shorten up some long lines */ 1879 #define INTREG I40E_PFINT_DYN_CTLN 1880 static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) 1881 { 1882 return !!(vsi->rx_rings[idx]->rx_itr_setting); 1883 } 1884 1885 static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) 1886 { 1887 return !!(vsi->tx_rings[idx]->tx_itr_setting); 1888 } 1889 1890 /** 1891 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt 1892 * @vsi: the VSI we care about 1893 * @q_vector: q_vector for which itr is being updated and interrupt enabled 1894 * 1895 **/ 1896 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, 1897 struct i40e_q_vector *q_vector) 1898 { 1899 struct i40e_hw *hw = &vsi->back->hw; 1900 bool rx = false, tx = false; 1901 u32 rxval, txval; 1902 int vector; 1903 int idx = q_vector->v_idx; 1904 int rx_itr_setting, tx_itr_setting; 1905 1906 vector = (q_vector->v_idx + vsi->base_vector); 1907 1908 /* avoid dynamic calculation if in countdown mode OR if 1909 * all dynamic is disabled 1910 */ 1911 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); 1912 1913 rx_itr_setting = get_rx_itr_enabled(vsi, idx); 1914 tx_itr_setting = get_tx_itr_enabled(vsi, idx); 1915 1916 if (q_vector->itr_countdown > 0 || 1917 (!ITR_IS_DYNAMIC(rx_itr_setting) && 1918 !ITR_IS_DYNAMIC(tx_itr_setting))) { 1919 goto enable_int; 1920 } 1921 1922 if (ITR_IS_DYNAMIC(tx_itr_setting)) { 1923 rx = i40e_set_new_dynamic_itr(&q_vector->rx); 1924 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); 1925 } 1926 1927 if (ITR_IS_DYNAMIC(tx_itr_setting)) { 1928 tx = i40e_set_new_dynamic_itr(&q_vector->tx); 1929 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); 1930 } 1931 1932 if (rx || tx) { 1933 /* get the higher of the two ITR adjustments and 1934 * use the same value for both ITR registers 1935 * when in adaptive mode (Rx and/or Tx) 1936 */ 1937 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); 1938 1939 q_vector->tx.itr = q_vector->rx.itr = itr; 1940 txval = i40e_buildreg_itr(I40E_TX_ITR, itr); 1941 tx = true; 1942 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); 1943 rx = true; 1944 } 1945 1946 /* only need to enable the interrupt once, but need 1947 * to possibly update both ITR values 1948 */ 1949 if (rx) { 1950 /* set the INTENA_MSK_MASK so that this first write 1951 * won't actually enable the interrupt, instead just 1952 * updating the ITR (it's bit 31 PF and VF) 1953 */ 1954 rxval |= BIT(31); 1955 /* don't check _DOWN because interrupt isn't being enabled */ 1956 wr32(hw, INTREG(vector - 1), rxval); 1957 } 1958 1959 enable_int: 1960 if (!test_bit(__I40E_DOWN, &vsi->state)) 1961 wr32(hw, INTREG(vector - 1), txval); 1962 1963 if (q_vector->itr_countdown) 1964 q_vector->itr_countdown--; 1965 else 1966 q_vector->itr_countdown = ITR_COUNTDOWN_START; 1967 } 1968 1969 /** 1970 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine 1971 * @napi: napi struct with our devices info in it 1972 * @budget: amount of work driver is allowed to do this pass, in packets 1973 * 1974 * This function will clean all queues associated with a q_vector. 1975 * 1976 * Returns the amount of work done 1977 **/ 1978 int i40e_napi_poll(struct napi_struct *napi, int budget) 1979 { 1980 struct i40e_q_vector *q_vector = 1981 container_of(napi, struct i40e_q_vector, napi); 1982 struct i40e_vsi *vsi = q_vector->vsi; 1983 struct i40e_ring *ring; 1984 bool clean_complete = true; 1985 bool arm_wb = false; 1986 int budget_per_ring; 1987 int work_done = 0; 1988 1989 if (test_bit(__I40E_DOWN, &vsi->state)) { 1990 napi_complete(napi); 1991 return 0; 1992 } 1993 1994 /* Clear hung_detected bit */ 1995 clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); 1996 /* Since the actual Tx work is minimal, we can give the Tx a larger 1997 * budget and be more aggressive about cleaning up the Tx descriptors. 1998 */ 1999 i40e_for_each_ring(ring, q_vector->tx) { 2000 if (!i40e_clean_tx_irq(vsi, ring, budget)) { 2001 clean_complete = false; 2002 continue; 2003 } 2004 arm_wb |= ring->arm_wb; 2005 ring->arm_wb = false; 2006 } 2007 2008 /* Handle case where we are called by netpoll with a budget of 0 */ 2009 if (budget <= 0) 2010 goto tx_only; 2011 2012 /* We attempt to distribute budget to each Rx queue fairly, but don't 2013 * allow the budget to go below 1 because that would exit polling early. 2014 */ 2015 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 2016 2017 i40e_for_each_ring(ring, q_vector->rx) { 2018 int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); 2019 2020 work_done += cleaned; 2021 /* if we clean as many as budgeted, we must not be done */ 2022 if (cleaned >= budget_per_ring) 2023 clean_complete = false; 2024 } 2025 2026 /* If work not completed, return budget and polling will return */ 2027 if (!clean_complete) { 2028 tx_only: 2029 if (arm_wb) { 2030 q_vector->tx.ring[0].tx_stats.tx_force_wb++; 2031 i40e_enable_wb_on_itr(vsi, q_vector); 2032 } 2033 return budget; 2034 } 2035 2036 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) 2037 q_vector->arm_wb_state = false; 2038 2039 /* Work is done so exit the polling mode and re-enable the interrupt */ 2040 napi_complete_done(napi, work_done); 2041 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 2042 i40e_update_enable_itr(vsi, q_vector); 2043 } else { /* Legacy mode */ 2044 i40e_irq_dynamic_enable_icr0(vsi->back, false); 2045 } 2046 return 0; 2047 } 2048 2049 /** 2050 * i40e_atr - Add a Flow Director ATR filter 2051 * @tx_ring: ring to add programming descriptor to 2052 * @skb: send buffer 2053 * @tx_flags: send tx flags 2054 **/ 2055 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, 2056 u32 tx_flags) 2057 { 2058 struct i40e_filter_program_desc *fdir_desc; 2059 struct i40e_pf *pf = tx_ring->vsi->back; 2060 union { 2061 unsigned char *network; 2062 struct iphdr *ipv4; 2063 struct ipv6hdr *ipv6; 2064 } hdr; 2065 struct tcphdr *th; 2066 unsigned int hlen; 2067 u32 flex_ptype, dtype_cmd; 2068 int l4_proto; 2069 u16 i; 2070 2071 /* make sure ATR is enabled */ 2072 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) 2073 return; 2074 2075 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) 2076 return; 2077 2078 /* if sampling is disabled do nothing */ 2079 if (!tx_ring->atr_sample_rate) 2080 return; 2081 2082 /* Currently only IPv4/IPv6 with TCP is supported */ 2083 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) 2084 return; 2085 2086 /* snag network header to get L4 type and address */ 2087 hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ? 2088 skb_inner_network_header(skb) : skb_network_header(skb); 2089 2090 /* Note: tx_flags gets modified to reflect inner protocols in 2091 * tx_enable_csum function if encap is enabled. 2092 */ 2093 if (tx_flags & I40E_TX_FLAGS_IPV4) { 2094 /* access ihl as u8 to avoid unaligned access on ia64 */ 2095 hlen = (hdr.network[0] & 0x0F) << 2; 2096 l4_proto = hdr.ipv4->protocol; 2097 } else { 2098 hlen = hdr.network - skb->data; 2099 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); 2100 hlen -= hdr.network - skb->data; 2101 } 2102 2103 if (l4_proto != IPPROTO_TCP) 2104 return; 2105 2106 th = (struct tcphdr *)(hdr.network + hlen); 2107 2108 /* Due to lack of space, no more new filters can be programmed */ 2109 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) 2110 return; 2111 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && 2112 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { 2113 /* HW ATR eviction will take care of removing filters on FIN 2114 * and RST packets. 2115 */ 2116 if (th->fin || th->rst) 2117 return; 2118 } 2119 2120 tx_ring->atr_count++; 2121 2122 /* sample on all syn/fin/rst packets or once every atr sample rate */ 2123 if (!th->fin && 2124 !th->syn && 2125 !th->rst && 2126 (tx_ring->atr_count < tx_ring->atr_sample_rate)) 2127 return; 2128 2129 tx_ring->atr_count = 0; 2130 2131 /* grab the next descriptor */ 2132 i = tx_ring->next_to_use; 2133 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 2134 2135 i++; 2136 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 2137 2138 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 2139 I40E_TXD_FLTR_QW0_QINDEX_MASK; 2140 flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? 2141 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << 2142 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : 2143 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << 2144 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); 2145 2146 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; 2147 2148 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; 2149 2150 dtype_cmd |= (th->fin || th->rst) ? 2151 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 2152 I40E_TXD_FLTR_QW1_PCMD_SHIFT) : 2153 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 2154 I40E_TXD_FLTR_QW1_PCMD_SHIFT); 2155 2156 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX << 2157 I40E_TXD_FLTR_QW1_DEST_SHIFT; 2158 2159 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << 2160 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; 2161 2162 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 2163 if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) 2164 dtype_cmd |= 2165 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) << 2166 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 2167 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 2168 else 2169 dtype_cmd |= 2170 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) << 2171 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 2172 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 2173 2174 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && 2175 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) 2176 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; 2177 2178 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); 2179 fdir_desc->rsvd = cpu_to_le32(0); 2180 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 2181 fdir_desc->fd_id = cpu_to_le32(0); 2182 } 2183 2184 /** 2185 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 2186 * @skb: send buffer 2187 * @tx_ring: ring to send buffer on 2188 * @flags: the tx flags to be set 2189 * 2190 * Checks the skb and set up correspondingly several generic transmit flags 2191 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 2192 * 2193 * Returns error code indicate the frame should be dropped upon error and the 2194 * otherwise returns 0 to indicate the flags has been set properly. 2195 **/ 2196 #ifdef I40E_FCOE 2197 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 2198 struct i40e_ring *tx_ring, 2199 u32 *flags) 2200 #else 2201 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 2202 struct i40e_ring *tx_ring, 2203 u32 *flags) 2204 #endif 2205 { 2206 __be16 protocol = skb->protocol; 2207 u32 tx_flags = 0; 2208 2209 if (protocol == htons(ETH_P_8021Q) && 2210 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { 2211 /* When HW VLAN acceleration is turned off by the user the 2212 * stack sets the protocol to 8021q so that the driver 2213 * can take any steps required to support the SW only 2214 * VLAN handling. In our case the driver doesn't need 2215 * to take any further steps so just set the protocol 2216 * to the encapsulated ethertype. 2217 */ 2218 skb->protocol = vlan_get_protocol(skb); 2219 goto out; 2220 } 2221 2222 /* if we have a HW VLAN tag being added, default to the HW one */ 2223 if (skb_vlan_tag_present(skb)) { 2224 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 2225 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 2226 /* else if it is a SW VLAN, check the next protocol and store the tag */ 2227 } else if (protocol == htons(ETH_P_8021Q)) { 2228 struct vlan_hdr *vhdr, _vhdr; 2229 2230 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 2231 if (!vhdr) 2232 return -EINVAL; 2233 2234 protocol = vhdr->h_vlan_encapsulated_proto; 2235 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 2236 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 2237 } 2238 2239 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED)) 2240 goto out; 2241 2242 /* Insert 802.1p priority into VLAN header */ 2243 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) || 2244 (skb->priority != TC_PRIO_CONTROL)) { 2245 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK; 2246 tx_flags |= (skb->priority & 0x7) << 2247 I40E_TX_FLAGS_VLAN_PRIO_SHIFT; 2248 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) { 2249 struct vlan_ethhdr *vhdr; 2250 int rc; 2251 2252 rc = skb_cow_head(skb, 0); 2253 if (rc < 0) 2254 return rc; 2255 vhdr = (struct vlan_ethhdr *)skb->data; 2256 vhdr->h_vlan_TCI = htons(tx_flags >> 2257 I40E_TX_FLAGS_VLAN_SHIFT); 2258 } else { 2259 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 2260 } 2261 } 2262 2263 out: 2264 *flags = tx_flags; 2265 return 0; 2266 } 2267 2268 /** 2269 * i40e_tso - set up the tso context descriptor 2270 * @skb: ptr to the skb we're sending 2271 * @hdr_len: ptr to the size of the packet header 2272 * @cd_type_cmd_tso_mss: Quad Word 1 2273 * 2274 * Returns 0 if no TSO can happen, 1 if tso is going, or error 2275 **/ 2276 static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) 2277 { 2278 u64 cd_cmd, cd_tso_len, cd_mss; 2279 union { 2280 struct iphdr *v4; 2281 struct ipv6hdr *v6; 2282 unsigned char *hdr; 2283 } ip; 2284 union { 2285 struct tcphdr *tcp; 2286 struct udphdr *udp; 2287 unsigned char *hdr; 2288 } l4; 2289 u32 paylen, l4_offset; 2290 int err; 2291 2292 if (skb->ip_summed != CHECKSUM_PARTIAL) 2293 return 0; 2294 2295 if (!skb_is_gso(skb)) 2296 return 0; 2297 2298 err = skb_cow_head(skb, 0); 2299 if (err < 0) 2300 return err; 2301 2302 ip.hdr = skb_network_header(skb); 2303 l4.hdr = skb_transport_header(skb); 2304 2305 /* initialize outer IP header fields */ 2306 if (ip.v4->version == 4) { 2307 ip.v4->tot_len = 0; 2308 ip.v4->check = 0; 2309 } else { 2310 ip.v6->payload_len = 0; 2311 } 2312 2313 if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | 2314 SKB_GSO_GRE_CSUM | 2315 SKB_GSO_IPXIP4 | 2316 SKB_GSO_IPXIP6 | 2317 SKB_GSO_UDP_TUNNEL | 2318 SKB_GSO_UDP_TUNNEL_CSUM)) { 2319 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 2320 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { 2321 l4.udp->len = 0; 2322 2323 /* determine offset of outer transport header */ 2324 l4_offset = l4.hdr - skb->data; 2325 2326 /* remove payload length from outer checksum */ 2327 paylen = skb->len - l4_offset; 2328 csum_replace_by_diff(&l4.udp->check, htonl(paylen)); 2329 } 2330 2331 /* reset pointers to inner headers */ 2332 ip.hdr = skb_inner_network_header(skb); 2333 l4.hdr = skb_inner_transport_header(skb); 2334 2335 /* initialize inner IP header fields */ 2336 if (ip.v4->version == 4) { 2337 ip.v4->tot_len = 0; 2338 ip.v4->check = 0; 2339 } else { 2340 ip.v6->payload_len = 0; 2341 } 2342 } 2343 2344 /* determine offset of inner transport header */ 2345 l4_offset = l4.hdr - skb->data; 2346 2347 /* remove payload length from inner checksum */ 2348 paylen = skb->len - l4_offset; 2349 csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); 2350 2351 /* compute length of segmentation header */ 2352 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 2353 2354 /* find the field values */ 2355 cd_cmd = I40E_TX_CTX_DESC_TSO; 2356 cd_tso_len = skb->len - *hdr_len; 2357 cd_mss = skb_shinfo(skb)->gso_size; 2358 *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 2359 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 2360 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 2361 return 1; 2362 } 2363 2364 /** 2365 * i40e_tsyn - set up the tsyn context descriptor 2366 * @tx_ring: ptr to the ring to send 2367 * @skb: ptr to the skb we're sending 2368 * @tx_flags: the collected send information 2369 * @cd_type_cmd_tso_mss: Quad Word 1 2370 * 2371 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen 2372 **/ 2373 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, 2374 u32 tx_flags, u64 *cd_type_cmd_tso_mss) 2375 { 2376 struct i40e_pf *pf; 2377 2378 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) 2379 return 0; 2380 2381 /* Tx timestamps cannot be sampled when doing TSO */ 2382 if (tx_flags & I40E_TX_FLAGS_TSO) 2383 return 0; 2384 2385 /* only timestamp the outbound packet if the user has requested it and 2386 * we are not already transmitting a packet to be timestamped 2387 */ 2388 pf = i40e_netdev_to_pf(tx_ring->netdev); 2389 if (!(pf->flags & I40E_FLAG_PTP)) 2390 return 0; 2391 2392 if (pf->ptp_tx && 2393 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) { 2394 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 2395 pf->ptp_tx_skb = skb_get(skb); 2396 } else { 2397 return 0; 2398 } 2399 2400 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN << 2401 I40E_TXD_CTX_QW1_CMD_SHIFT; 2402 2403 return 1; 2404 } 2405 2406 /** 2407 * i40e_tx_enable_csum - Enable Tx checksum offloads 2408 * @skb: send buffer 2409 * @tx_flags: pointer to Tx flags currently set 2410 * @td_cmd: Tx descriptor command bits to set 2411 * @td_offset: Tx descriptor header offsets to set 2412 * @tx_ring: Tx descriptor ring 2413 * @cd_tunneling: ptr to context desc bits 2414 **/ 2415 static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, 2416 u32 *td_cmd, u32 *td_offset, 2417 struct i40e_ring *tx_ring, 2418 u32 *cd_tunneling) 2419 { 2420 union { 2421 struct iphdr *v4; 2422 struct ipv6hdr *v6; 2423 unsigned char *hdr; 2424 } ip; 2425 union { 2426 struct tcphdr *tcp; 2427 struct udphdr *udp; 2428 unsigned char *hdr; 2429 } l4; 2430 unsigned char *exthdr; 2431 u32 offset, cmd = 0; 2432 __be16 frag_off; 2433 u8 l4_proto = 0; 2434 2435 if (skb->ip_summed != CHECKSUM_PARTIAL) 2436 return 0; 2437 2438 ip.hdr = skb_network_header(skb); 2439 l4.hdr = skb_transport_header(skb); 2440 2441 /* compute outer L2 header size */ 2442 offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 2443 2444 if (skb->encapsulation) { 2445 u32 tunnel = 0; 2446 /* define outer network header type */ 2447 if (*tx_flags & I40E_TX_FLAGS_IPV4) { 2448 tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? 2449 I40E_TX_CTX_EXT_IP_IPV4 : 2450 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 2451 2452 l4_proto = ip.v4->protocol; 2453 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { 2454 tunnel |= I40E_TX_CTX_EXT_IP_IPV6; 2455 2456 exthdr = ip.hdr + sizeof(*ip.v6); 2457 l4_proto = ip.v6->nexthdr; 2458 if (l4.hdr != exthdr) 2459 ipv6_skip_exthdr(skb, exthdr - skb->data, 2460 &l4_proto, &frag_off); 2461 } 2462 2463 /* define outer transport */ 2464 switch (l4_proto) { 2465 case IPPROTO_UDP: 2466 tunnel |= I40E_TXD_CTX_UDP_TUNNELING; 2467 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; 2468 break; 2469 case IPPROTO_GRE: 2470 tunnel |= I40E_TXD_CTX_GRE_TUNNELING; 2471 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; 2472 break; 2473 case IPPROTO_IPIP: 2474 case IPPROTO_IPV6: 2475 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; 2476 l4.hdr = skb_inner_network_header(skb); 2477 break; 2478 default: 2479 if (*tx_flags & I40E_TX_FLAGS_TSO) 2480 return -1; 2481 2482 skb_checksum_help(skb); 2483 return 0; 2484 } 2485 2486 /* compute outer L3 header size */ 2487 tunnel |= ((l4.hdr - ip.hdr) / 4) << 2488 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; 2489 2490 /* switch IP header pointer from outer to inner header */ 2491 ip.hdr = skb_inner_network_header(skb); 2492 2493 /* compute tunnel header size */ 2494 tunnel |= ((ip.hdr - l4.hdr) / 2) << 2495 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 2496 2497 /* indicate if we need to offload outer UDP header */ 2498 if ((*tx_flags & I40E_TX_FLAGS_TSO) && 2499 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 2500 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 2501 tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; 2502 2503 /* record tunnel offload values */ 2504 *cd_tunneling |= tunnel; 2505 2506 /* switch L4 header pointer from outer to inner */ 2507 l4.hdr = skb_inner_transport_header(skb); 2508 l4_proto = 0; 2509 2510 /* reset type as we transition from outer to inner headers */ 2511 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); 2512 if (ip.v4->version == 4) 2513 *tx_flags |= I40E_TX_FLAGS_IPV4; 2514 if (ip.v6->version == 6) 2515 *tx_flags |= I40E_TX_FLAGS_IPV6; 2516 } 2517 2518 /* Enable IP checksum offloads */ 2519 if (*tx_flags & I40E_TX_FLAGS_IPV4) { 2520 l4_proto = ip.v4->protocol; 2521 /* the stack computes the IP header already, the only time we 2522 * need the hardware to recompute it is in the case of TSO. 2523 */ 2524 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? 2525 I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : 2526 I40E_TX_DESC_CMD_IIPT_IPV4; 2527 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { 2528 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 2529 2530 exthdr = ip.hdr + sizeof(*ip.v6); 2531 l4_proto = ip.v6->nexthdr; 2532 if (l4.hdr != exthdr) 2533 ipv6_skip_exthdr(skb, exthdr - skb->data, 2534 &l4_proto, &frag_off); 2535 } 2536 2537 /* compute inner L3 header size */ 2538 offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 2539 2540 /* Enable L4 checksum offloads */ 2541 switch (l4_proto) { 2542 case IPPROTO_TCP: 2543 /* enable checksum offloads */ 2544 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 2545 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2546 break; 2547 case IPPROTO_SCTP: 2548 /* enable SCTP checksum offload */ 2549 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 2550 offset |= (sizeof(struct sctphdr) >> 2) << 2551 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2552 break; 2553 case IPPROTO_UDP: 2554 /* enable UDP checksum offload */ 2555 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 2556 offset |= (sizeof(struct udphdr) >> 2) << 2557 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2558 break; 2559 default: 2560 if (*tx_flags & I40E_TX_FLAGS_TSO) 2561 return -1; 2562 skb_checksum_help(skb); 2563 return 0; 2564 } 2565 2566 *td_cmd |= cmd; 2567 *td_offset |= offset; 2568 2569 return 1; 2570 } 2571 2572 /** 2573 * i40e_create_tx_ctx Build the Tx context descriptor 2574 * @tx_ring: ring to create the descriptor on 2575 * @cd_type_cmd_tso_mss: Quad Word 1 2576 * @cd_tunneling: Quad Word 0 - bits 0-31 2577 * @cd_l2tag2: Quad Word 0 - bits 32-63 2578 **/ 2579 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 2580 const u64 cd_type_cmd_tso_mss, 2581 const u32 cd_tunneling, const u32 cd_l2tag2) 2582 { 2583 struct i40e_tx_context_desc *context_desc; 2584 int i = tx_ring->next_to_use; 2585 2586 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && 2587 !cd_tunneling && !cd_l2tag2) 2588 return; 2589 2590 /* grab the next descriptor */ 2591 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 2592 2593 i++; 2594 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 2595 2596 /* cpu_to_le32 and assign to struct fields */ 2597 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 2598 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 2599 context_desc->rsvd = cpu_to_le16(0); 2600 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 2601 } 2602 2603 /** 2604 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions 2605 * @tx_ring: the ring to be checked 2606 * @size: the size buffer we want to assure is available 2607 * 2608 * Returns -EBUSY if a stop is needed, else 0 2609 **/ 2610 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 2611 { 2612 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 2613 /* Memory barrier before checking head and tail */ 2614 smp_mb(); 2615 2616 /* Check again in a case another CPU has just made room available. */ 2617 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 2618 return -EBUSY; 2619 2620 /* A reprieve! - use start_queue because it doesn't call schedule */ 2621 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 2622 ++tx_ring->tx_stats.restart_queue; 2623 return 0; 2624 } 2625 2626 /** 2627 * __i40e_chk_linearize - Check if there are more than 8 buffers per packet 2628 * @skb: send buffer 2629 * 2630 * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire 2631 * and so we need to figure out the cases where we need to linearize the skb. 2632 * 2633 * For TSO we need to count the TSO header and segment payload separately. 2634 * As such we need to check cases where we have 7 fragments or more as we 2635 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for 2636 * the segment payload in the first descriptor, and another 7 for the 2637 * fragments. 2638 **/ 2639 bool __i40e_chk_linearize(struct sk_buff *skb) 2640 { 2641 const struct skb_frag_struct *frag, *stale; 2642 int nr_frags, sum; 2643 2644 /* no need to check if number of frags is less than 7 */ 2645 nr_frags = skb_shinfo(skb)->nr_frags; 2646 if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) 2647 return false; 2648 2649 /* We need to walk through the list and validate that each group 2650 * of 6 fragments totals at least gso_size. 2651 */ 2652 nr_frags -= I40E_MAX_BUFFER_TXD - 2; 2653 frag = &skb_shinfo(skb)->frags[0]; 2654 2655 /* Initialize size to the negative value of gso_size minus 1. We 2656 * use this as the worst case scenerio in which the frag ahead 2657 * of us only provides one byte which is why we are limited to 6 2658 * descriptors for a single transmit as the header and previous 2659 * fragment are already consuming 2 descriptors. 2660 */ 2661 sum = 1 - skb_shinfo(skb)->gso_size; 2662 2663 /* Add size of frags 0 through 4 to create our initial sum */ 2664 sum += skb_frag_size(frag++); 2665 sum += skb_frag_size(frag++); 2666 sum += skb_frag_size(frag++); 2667 sum += skb_frag_size(frag++); 2668 sum += skb_frag_size(frag++); 2669 2670 /* Walk through fragments adding latest fragment, testing it, and 2671 * then removing stale fragments from the sum. 2672 */ 2673 stale = &skb_shinfo(skb)->frags[0]; 2674 for (;;) { 2675 sum += skb_frag_size(frag++); 2676 2677 /* if sum is negative we failed to make sufficient progress */ 2678 if (sum < 0) 2679 return true; 2680 2681 if (!nr_frags--) 2682 break; 2683 2684 sum -= skb_frag_size(stale++); 2685 } 2686 2687 return false; 2688 } 2689 2690 /** 2691 * i40e_tx_map - Build the Tx descriptor 2692 * @tx_ring: ring to send buffer on 2693 * @skb: send buffer 2694 * @first: first buffer info buffer to use 2695 * @tx_flags: collected send information 2696 * @hdr_len: size of the packet header 2697 * @td_cmd: the command field in the descriptor 2698 * @td_offset: offset for checksum or crc 2699 **/ 2700 #ifdef I40E_FCOE 2701 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 2702 struct i40e_tx_buffer *first, u32 tx_flags, 2703 const u8 hdr_len, u32 td_cmd, u32 td_offset) 2704 #else 2705 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 2706 struct i40e_tx_buffer *first, u32 tx_flags, 2707 const u8 hdr_len, u32 td_cmd, u32 td_offset) 2708 #endif 2709 { 2710 unsigned int data_len = skb->data_len; 2711 unsigned int size = skb_headlen(skb); 2712 struct skb_frag_struct *frag; 2713 struct i40e_tx_buffer *tx_bi; 2714 struct i40e_tx_desc *tx_desc; 2715 u16 i = tx_ring->next_to_use; 2716 u32 td_tag = 0; 2717 dma_addr_t dma; 2718 u16 gso_segs; 2719 u16 desc_count = 0; 2720 bool tail_bump = true; 2721 bool do_rs = false; 2722 2723 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 2724 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 2725 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 2726 I40E_TX_FLAGS_VLAN_SHIFT; 2727 } 2728 2729 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 2730 gso_segs = skb_shinfo(skb)->gso_segs; 2731 else 2732 gso_segs = 1; 2733 2734 /* multiply data chunks by size of headers */ 2735 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); 2736 first->gso_segs = gso_segs; 2737 first->skb = skb; 2738 first->tx_flags = tx_flags; 2739 2740 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 2741 2742 tx_desc = I40E_TX_DESC(tx_ring, i); 2743 tx_bi = first; 2744 2745 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 2746 unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; 2747 2748 if (dma_mapping_error(tx_ring->dev, dma)) 2749 goto dma_error; 2750 2751 /* record length, and DMA address */ 2752 dma_unmap_len_set(tx_bi, len, size); 2753 dma_unmap_addr_set(tx_bi, dma, dma); 2754 2755 /* align size to end of page */ 2756 max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1); 2757 tx_desc->buffer_addr = cpu_to_le64(dma); 2758 2759 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 2760 tx_desc->cmd_type_offset_bsz = 2761 build_ctob(td_cmd, td_offset, 2762 max_data, td_tag); 2763 2764 tx_desc++; 2765 i++; 2766 desc_count++; 2767 2768 if (i == tx_ring->count) { 2769 tx_desc = I40E_TX_DESC(tx_ring, 0); 2770 i = 0; 2771 } 2772 2773 dma += max_data; 2774 size -= max_data; 2775 2776 max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; 2777 tx_desc->buffer_addr = cpu_to_le64(dma); 2778 } 2779 2780 if (likely(!data_len)) 2781 break; 2782 2783 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 2784 size, td_tag); 2785 2786 tx_desc++; 2787 i++; 2788 desc_count++; 2789 2790 if (i == tx_ring->count) { 2791 tx_desc = I40E_TX_DESC(tx_ring, 0); 2792 i = 0; 2793 } 2794 2795 size = skb_frag_size(frag); 2796 data_len -= size; 2797 2798 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 2799 DMA_TO_DEVICE); 2800 2801 tx_bi = &tx_ring->tx_bi[i]; 2802 } 2803 2804 /* set next_to_watch value indicating a packet is present */ 2805 first->next_to_watch = tx_desc; 2806 2807 i++; 2808 if (i == tx_ring->count) 2809 i = 0; 2810 2811 tx_ring->next_to_use = i; 2812 2813 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 2814 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); 2815 2816 /* Algorithm to optimize tail and RS bit setting: 2817 * if xmit_more is supported 2818 * if xmit_more is true 2819 * do not update tail and do not mark RS bit. 2820 * if xmit_more is false and last xmit_more was false 2821 * if every packet spanned less than 4 desc 2822 * then set RS bit on 4th packet and update tail 2823 * on every packet 2824 * else 2825 * update tail and set RS bit on every packet. 2826 * if xmit_more is false and last_xmit_more was true 2827 * update tail and set RS bit. 2828 * 2829 * Optimization: wmb to be issued only in case of tail update. 2830 * Also optimize the Descriptor WB path for RS bit with the same 2831 * algorithm. 2832 * 2833 * Note: If there are less than 4 packets 2834 * pending and interrupts were disabled the service task will 2835 * trigger a force WB. 2836 */ 2837 if (skb->xmit_more && 2838 !netif_xmit_stopped(txring_txq(tx_ring))) { 2839 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; 2840 tail_bump = false; 2841 } else if (!skb->xmit_more && 2842 !netif_xmit_stopped(txring_txq(tx_ring)) && 2843 (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && 2844 (tx_ring->packet_stride < WB_STRIDE) && 2845 (desc_count < WB_STRIDE)) { 2846 tx_ring->packet_stride++; 2847 } else { 2848 tx_ring->packet_stride = 0; 2849 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; 2850 do_rs = true; 2851 } 2852 if (do_rs) 2853 tx_ring->packet_stride = 0; 2854 2855 tx_desc->cmd_type_offset_bsz = 2856 build_ctob(td_cmd, td_offset, size, td_tag) | 2857 cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : 2858 I40E_TX_DESC_CMD_EOP) << 2859 I40E_TXD_QW1_CMD_SHIFT); 2860 2861 /* notify HW of packet */ 2862 if (!tail_bump) { 2863 prefetchw(tx_desc + 1); 2864 } else { 2865 /* Force memory writes to complete before letting h/w 2866 * know there are new descriptors to fetch. (Only 2867 * applicable for weak-ordered memory model archs, 2868 * such as IA-64). 2869 */ 2870 wmb(); 2871 writel(i, tx_ring->tail); 2872 } 2873 return; 2874 2875 dma_error: 2876 dev_info(tx_ring->dev, "TX DMA map failed\n"); 2877 2878 /* clear dma mappings for failed tx_bi map */ 2879 for (;;) { 2880 tx_bi = &tx_ring->tx_bi[i]; 2881 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 2882 if (tx_bi == first) 2883 break; 2884 if (i == 0) 2885 i = tx_ring->count; 2886 i--; 2887 } 2888 2889 tx_ring->next_to_use = i; 2890 } 2891 2892 /** 2893 * i40e_xmit_frame_ring - Sends buffer on Tx ring 2894 * @skb: send buffer 2895 * @tx_ring: ring to send buffer on 2896 * 2897 * Returns NETDEV_TX_OK if sent, else an error code 2898 **/ 2899 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 2900 struct i40e_ring *tx_ring) 2901 { 2902 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 2903 u32 cd_tunneling = 0, cd_l2tag2 = 0; 2904 struct i40e_tx_buffer *first; 2905 u32 td_offset = 0; 2906 u32 tx_flags = 0; 2907 __be16 protocol; 2908 u32 td_cmd = 0; 2909 u8 hdr_len = 0; 2910 int tso, count; 2911 int tsyn; 2912 2913 /* prefetch the data, we'll need it later */ 2914 prefetch(skb->data); 2915 2916 count = i40e_xmit_descriptor_count(skb); 2917 if (i40e_chk_linearize(skb, count)) { 2918 if (__skb_linearize(skb)) 2919 goto out_drop; 2920 count = i40e_txd_use_count(skb->len); 2921 tx_ring->tx_stats.tx_linearize++; 2922 } 2923 2924 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 2925 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 2926 * + 4 desc gap to avoid the cache line where head is, 2927 * + 1 desc for context descriptor, 2928 * otherwise try next time 2929 */ 2930 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { 2931 tx_ring->tx_stats.tx_busy++; 2932 return NETDEV_TX_BUSY; 2933 } 2934 2935 /* prepare the xmit flags */ 2936 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 2937 goto out_drop; 2938 2939 /* obtain protocol of skb */ 2940 protocol = vlan_get_protocol(skb); 2941 2942 /* record the location of the first descriptor for this packet */ 2943 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 2944 2945 /* setup IPv4/IPv6 offloads */ 2946 if (protocol == htons(ETH_P_IP)) 2947 tx_flags |= I40E_TX_FLAGS_IPV4; 2948 else if (protocol == htons(ETH_P_IPV6)) 2949 tx_flags |= I40E_TX_FLAGS_IPV6; 2950 2951 tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss); 2952 2953 if (tso < 0) 2954 goto out_drop; 2955 else if (tso) 2956 tx_flags |= I40E_TX_FLAGS_TSO; 2957 2958 /* Always offload the checksum, since it's in the data descriptor */ 2959 tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, 2960 tx_ring, &cd_tunneling); 2961 if (tso < 0) 2962 goto out_drop; 2963 2964 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); 2965 2966 if (tsyn) 2967 tx_flags |= I40E_TX_FLAGS_TSYN; 2968 2969 skb_tx_timestamp(skb); 2970 2971 /* always enable CRC insertion offload */ 2972 td_cmd |= I40E_TX_DESC_CMD_ICRC; 2973 2974 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 2975 cd_tunneling, cd_l2tag2); 2976 2977 /* Add Flow Director ATR if it's enabled. 2978 * 2979 * NOTE: this must always be directly before the data descriptor. 2980 */ 2981 i40e_atr(tx_ring, skb, tx_flags); 2982 2983 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 2984 td_cmd, td_offset); 2985 2986 return NETDEV_TX_OK; 2987 2988 out_drop: 2989 dev_kfree_skb_any(skb); 2990 return NETDEV_TX_OK; 2991 } 2992 2993 /** 2994 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer 2995 * @skb: send buffer 2996 * @netdev: network interface device structure 2997 * 2998 * Returns NETDEV_TX_OK if sent, else an error code 2999 **/ 3000 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 3001 { 3002 struct i40e_netdev_priv *np = netdev_priv(netdev); 3003 struct i40e_vsi *vsi = np->vsi; 3004 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; 3005 3006 /* hardware can't handle really short frames, hardware padding works 3007 * beyond this point 3008 */ 3009 if (skb_put_padto(skb, I40E_MIN_TX_LEN)) 3010 return NETDEV_TX_OK; 3011 3012 return i40e_xmit_frame_ring(skb, tx_ring); 3013 } 3014