1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018, Intel Corporation. */ 3 4 /* Intel(R) Ethernet Connection E800 Series Linux Driver */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <generated/utsrelease.h> 9 #include "ice.h" 10 #include "ice_base.h" 11 #include "ice_lib.h" 12 #include "ice_fltr.h" 13 #include "ice_dcb_lib.h" 14 #include "ice_dcb_nl.h" 15 #include "ice_devlink.h" 16 /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the 17 * ice tracepoint functions. This must be done exactly once across the 18 * ice driver. 19 */ 20 #define CREATE_TRACE_POINTS 21 #include "ice_trace.h" 22 #include "ice_eswitch.h" 23 #include "ice_tc_lib.h" 24 #include "ice_vsi_vlan_ops.h" 25 26 #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" 27 static const char ice_driver_string[] = DRV_SUMMARY; 28 static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; 29 30 /* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */ 31 #define ICE_DDP_PKG_PATH "intel/ice/ddp/" 32 #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg" 33 34 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 35 MODULE_DESCRIPTION(DRV_SUMMARY); 36 MODULE_LICENSE("GPL v2"); 37 MODULE_FIRMWARE(ICE_DDP_PKG_FILE); 38 39 static int debug = -1; 40 module_param(debug, int, 0644); 41 #ifndef CONFIG_DYNAMIC_DEBUG 42 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)"); 43 #else 44 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); 45 #endif /* !CONFIG_DYNAMIC_DEBUG */ 46 47 static DEFINE_IDA(ice_aux_ida); 48 DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key); 49 EXPORT_SYMBOL(ice_xdp_locking_key); 50 51 /** 52 * ice_hw_to_dev - Get device pointer from the hardware structure 53 * @hw: pointer to the device HW structure 54 * 55 * Used to access the device pointer from compilation units which can't easily 56 * include the definition of struct ice_pf without leading to circular header 57 * dependencies. 58 */ 59 struct device *ice_hw_to_dev(struct ice_hw *hw) 60 { 61 struct ice_pf *pf = container_of(hw, struct ice_pf, hw); 62 63 return &pf->pdev->dev; 64 } 65 66 static struct workqueue_struct *ice_wq; 67 static const struct net_device_ops ice_netdev_safe_mode_ops; 68 static const struct net_device_ops ice_netdev_ops; 69 70 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); 71 72 static void ice_vsi_release_all(struct ice_pf *pf); 73 74 static int ice_rebuild_channels(struct ice_pf *pf); 75 static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr); 76 77 static int 78 ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, 79 void *cb_priv, enum tc_setup_type type, void *type_data, 80 void *data, 81 void (*cleanup)(struct flow_block_cb *block_cb)); 82 83 bool netif_is_ice(struct net_device *dev) 84 { 85 return dev && (dev->netdev_ops == &ice_netdev_ops); 86 } 87 88 /** 89 * ice_get_tx_pending - returns number of Tx descriptors not processed 90 * @ring: the ring of descriptors 91 */ 92 static u16 ice_get_tx_pending(struct ice_tx_ring *ring) 93 { 94 u16 head, tail; 95 96 head = ring->next_to_clean; 97 tail = ring->next_to_use; 98 99 if (head != tail) 100 return (head < tail) ? 101 tail - head : (tail + ring->count - head); 102 return 0; 103 } 104 105 /** 106 * ice_check_for_hang_subtask - check for and recover hung queues 107 * @pf: pointer to PF struct 108 */ 109 static void ice_check_for_hang_subtask(struct ice_pf *pf) 110 { 111 struct ice_vsi *vsi = NULL; 112 struct ice_hw *hw; 113 unsigned int i; 114 int packets; 115 u32 v; 116 117 ice_for_each_vsi(pf, v) 118 if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) { 119 vsi = pf->vsi[v]; 120 break; 121 } 122 123 if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state)) 124 return; 125 126 if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) 127 return; 128 129 hw = &vsi->back->hw; 130 131 ice_for_each_txq(vsi, i) { 132 struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; 133 134 if (!tx_ring) 135 continue; 136 if (ice_ring_ch_enabled(tx_ring)) 137 continue; 138 139 if (tx_ring->desc) { 140 /* If packet counter has not changed the queue is 141 * likely stalled, so force an interrupt for this 142 * queue. 143 * 144 * prev_pkt would be negative if there was no 145 * pending work. 146 */ 147 packets = tx_ring->stats.pkts & INT_MAX; 148 if (tx_ring->tx_stats.prev_pkt == packets) { 149 /* Trigger sw interrupt to revive the queue */ 150 ice_trigger_sw_intr(hw, tx_ring->q_vector); 151 continue; 152 } 153 154 /* Memory barrier between read of packet count and call 155 * to ice_get_tx_pending() 156 */ 157 smp_rmb(); 158 tx_ring->tx_stats.prev_pkt = 159 ice_get_tx_pending(tx_ring) ? packets : -1; 160 } 161 } 162 } 163 164 /** 165 * ice_init_mac_fltr - Set initial MAC filters 166 * @pf: board private structure 167 * 168 * Set initial set of MAC filters for PF VSI; configure filters for permanent 169 * address and broadcast address. If an error is encountered, netdevice will be 170 * unregistered. 171 */ 172 static int ice_init_mac_fltr(struct ice_pf *pf) 173 { 174 struct ice_vsi *vsi; 175 u8 *perm_addr; 176 177 vsi = ice_get_main_vsi(pf); 178 if (!vsi) 179 return -EINVAL; 180 181 perm_addr = vsi->port_info->mac.perm_addr; 182 return ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI); 183 } 184 185 /** 186 * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced 187 * @netdev: the net device on which the sync is happening 188 * @addr: MAC address to sync 189 * 190 * This is a callback function which is called by the in kernel device sync 191 * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only 192 * populates the tmp_sync_list, which is later used by ice_add_mac to add the 193 * MAC filters from the hardware. 194 */ 195 static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr) 196 { 197 struct ice_netdev_priv *np = netdev_priv(netdev); 198 struct ice_vsi *vsi = np->vsi; 199 200 if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr, 201 ICE_FWD_TO_VSI)) 202 return -EINVAL; 203 204 return 0; 205 } 206 207 /** 208 * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced 209 * @netdev: the net device on which the unsync is happening 210 * @addr: MAC address to unsync 211 * 212 * This is a callback function which is called by the in kernel device unsync 213 * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only 214 * populates the tmp_unsync_list, which is later used by ice_remove_mac to 215 * delete the MAC filters from the hardware. 216 */ 217 static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) 218 { 219 struct ice_netdev_priv *np = netdev_priv(netdev); 220 struct ice_vsi *vsi = np->vsi; 221 222 /* Under some circumstances, we might receive a request to delete our 223 * own device address from our uc list. Because we store the device 224 * address in the VSI's MAC filter list, we need to ignore such 225 * requests and not delete our device address from this list. 226 */ 227 if (ether_addr_equal(addr, netdev->dev_addr)) 228 return 0; 229 230 if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr, 231 ICE_FWD_TO_VSI)) 232 return -EINVAL; 233 234 return 0; 235 } 236 237 /** 238 * ice_vsi_fltr_changed - check if filter state changed 239 * @vsi: VSI to be checked 240 * 241 * returns true if filter state has changed, false otherwise. 242 */ 243 static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) 244 { 245 return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) || 246 test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) || 247 test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); 248 } 249 250 /** 251 * ice_set_promisc - Enable promiscuous mode for a given PF 252 * @vsi: the VSI being configured 253 * @promisc_m: mask of promiscuous config bits 254 * 255 */ 256 static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) 257 { 258 int status; 259 260 if (vsi->type != ICE_VSI_PF) 261 return 0; 262 263 if (ice_vsi_has_non_zero_vlans(vsi)) 264 status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m); 265 else 266 status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); 267 return status; 268 } 269 270 /** 271 * ice_clear_promisc - Disable promiscuous mode for a given PF 272 * @vsi: the VSI being configured 273 * @promisc_m: mask of promiscuous config bits 274 * 275 */ 276 static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) 277 { 278 int status; 279 280 if (vsi->type != ICE_VSI_PF) 281 return 0; 282 283 if (ice_vsi_has_non_zero_vlans(vsi)) 284 status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m); 285 else 286 status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); 287 return status; 288 } 289 290 /** 291 * ice_vsi_sync_fltr - Update the VSI filter list to the HW 292 * @vsi: ptr to the VSI 293 * 294 * Push any outstanding VSI filter changes through the AdminQ. 295 */ 296 static int ice_vsi_sync_fltr(struct ice_vsi *vsi) 297 { 298 struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); 299 struct device *dev = ice_pf_to_dev(vsi->back); 300 struct net_device *netdev = vsi->netdev; 301 bool promisc_forced_on = false; 302 struct ice_pf *pf = vsi->back; 303 struct ice_hw *hw = &pf->hw; 304 u32 changed_flags = 0; 305 u8 promisc_m; 306 int err; 307 308 if (!vsi->netdev) 309 return -EINVAL; 310 311 while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) 312 usleep_range(1000, 2000); 313 314 changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; 315 vsi->current_netdev_flags = vsi->netdev->flags; 316 317 INIT_LIST_HEAD(&vsi->tmp_sync_list); 318 INIT_LIST_HEAD(&vsi->tmp_unsync_list); 319 320 if (ice_vsi_fltr_changed(vsi)) { 321 clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); 322 clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); 323 clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); 324 325 /* grab the netdev's addr_list_lock */ 326 netif_addr_lock_bh(netdev); 327 __dev_uc_sync(netdev, ice_add_mac_to_sync_list, 328 ice_add_mac_to_unsync_list); 329 __dev_mc_sync(netdev, ice_add_mac_to_sync_list, 330 ice_add_mac_to_unsync_list); 331 /* our temp lists are populated. release lock */ 332 netif_addr_unlock_bh(netdev); 333 } 334 335 /* Remove MAC addresses in the unsync list */ 336 err = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list); 337 ice_fltr_free_list(dev, &vsi->tmp_unsync_list); 338 if (err) { 339 netdev_err(netdev, "Failed to delete MAC filters\n"); 340 /* if we failed because of alloc failures, just bail */ 341 if (err == -ENOMEM) 342 goto out; 343 } 344 345 /* Add MAC addresses in the sync list */ 346 err = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list); 347 ice_fltr_free_list(dev, &vsi->tmp_sync_list); 348 /* If filter is added successfully or already exists, do not go into 349 * 'if' condition and report it as error. Instead continue processing 350 * rest of the function. 351 */ 352 if (err && err != -EEXIST) { 353 netdev_err(netdev, "Failed to add MAC filters\n"); 354 /* If there is no more space for new umac filters, VSI 355 * should go into promiscuous mode. There should be some 356 * space reserved for promiscuous filters. 357 */ 358 if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && 359 !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC, 360 vsi->state)) { 361 promisc_forced_on = true; 362 netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n", 363 vsi->vsi_num); 364 } else { 365 goto out; 366 } 367 } 368 err = 0; 369 /* check for changes in promiscuous modes */ 370 if (changed_flags & IFF_ALLMULTI) { 371 if (vsi->current_netdev_flags & IFF_ALLMULTI) { 372 if (ice_vsi_has_non_zero_vlans(vsi)) 373 promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; 374 else 375 promisc_m = ICE_MCAST_PROMISC_BITS; 376 377 err = ice_set_promisc(vsi, promisc_m); 378 if (err) { 379 netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n", 380 vsi->vsi_num); 381 vsi->current_netdev_flags &= ~IFF_ALLMULTI; 382 goto out_promisc; 383 } 384 } else { 385 /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */ 386 if (ice_vsi_has_non_zero_vlans(vsi)) 387 promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; 388 else 389 promisc_m = ICE_MCAST_PROMISC_BITS; 390 391 err = ice_clear_promisc(vsi, promisc_m); 392 if (err) { 393 netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n", 394 vsi->vsi_num); 395 vsi->current_netdev_flags |= IFF_ALLMULTI; 396 goto out_promisc; 397 } 398 } 399 } 400 401 if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || 402 test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) { 403 clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); 404 if (vsi->current_netdev_flags & IFF_PROMISC) { 405 /* Apply Rx filter rule to get traffic from wire */ 406 if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { 407 err = ice_set_dflt_vsi(pf->first_sw, vsi); 408 if (err && err != -EEXIST) { 409 netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n", 410 err, vsi->vsi_num); 411 vsi->current_netdev_flags &= 412 ~IFF_PROMISC; 413 goto out_promisc; 414 } 415 err = 0; 416 vlan_ops->dis_rx_filtering(vsi); 417 } 418 } else { 419 /* Clear Rx filter to remove traffic from wire */ 420 if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) { 421 err = ice_clear_dflt_vsi(pf->first_sw); 422 if (err) { 423 netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n", 424 err, vsi->vsi_num); 425 vsi->current_netdev_flags |= 426 IFF_PROMISC; 427 goto out_promisc; 428 } 429 if (vsi->current_netdev_flags & 430 NETIF_F_HW_VLAN_CTAG_FILTER) 431 vlan_ops->ena_rx_filtering(vsi); 432 } 433 } 434 } 435 goto exit; 436 437 out_promisc: 438 set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); 439 goto exit; 440 out: 441 /* if something went wrong then set the changed flag so we try again */ 442 set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); 443 set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); 444 exit: 445 clear_bit(ICE_CFG_BUSY, vsi->state); 446 return err; 447 } 448 449 /** 450 * ice_sync_fltr_subtask - Sync the VSI filter list with HW 451 * @pf: board private structure 452 */ 453 static void ice_sync_fltr_subtask(struct ice_pf *pf) 454 { 455 int v; 456 457 if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags))) 458 return; 459 460 clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags); 461 462 ice_for_each_vsi(pf, v) 463 if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) && 464 ice_vsi_sync_fltr(pf->vsi[v])) { 465 /* come back and try again later */ 466 set_bit(ICE_FLAG_FLTR_SYNC, pf->flags); 467 break; 468 } 469 } 470 471 /** 472 * ice_pf_dis_all_vsi - Pause all VSIs on a PF 473 * @pf: the PF 474 * @locked: is the rtnl_lock already held 475 */ 476 static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) 477 { 478 int node; 479 int v; 480 481 ice_for_each_vsi(pf, v) 482 if (pf->vsi[v]) 483 ice_dis_vsi(pf->vsi[v], locked); 484 485 for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++) 486 pf->pf_agg_node[node].num_vsis = 0; 487 488 for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++) 489 pf->vf_agg_node[node].num_vsis = 0; 490 } 491 492 /** 493 * ice_clear_sw_switch_recipes - clear switch recipes 494 * @pf: board private structure 495 * 496 * Mark switch recipes as not created in sw structures. There are cases where 497 * rules (especially advanced rules) need to be restored, either re-read from 498 * hardware or added again. For example after the reset. 'recp_created' flag 499 * prevents from doing that and need to be cleared upfront. 500 */ 501 static void ice_clear_sw_switch_recipes(struct ice_pf *pf) 502 { 503 struct ice_sw_recipe *recp; 504 u8 i; 505 506 recp = pf->hw.switch_info->recp_list; 507 for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) 508 recp[i].recp_created = false; 509 } 510 511 /** 512 * ice_prepare_for_reset - prep for reset 513 * @pf: board private structure 514 * @reset_type: reset type requested 515 * 516 * Inform or close all dependent features in prep for reset. 517 */ 518 static void 519 ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) 520 { 521 struct ice_hw *hw = &pf->hw; 522 struct ice_vsi *vsi; 523 struct ice_vf *vf; 524 unsigned int bkt; 525 526 dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); 527 528 /* already prepared for reset */ 529 if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) 530 return; 531 532 ice_unplug_aux_dev(pf); 533 534 /* Notify VFs of impending reset */ 535 if (ice_check_sq_alive(hw, &hw->mailboxq)) 536 ice_vc_notify_reset(pf); 537 538 /* Disable VFs until reset is completed */ 539 mutex_lock(&pf->vfs.table_lock); 540 ice_for_each_vf(pf, bkt, vf) 541 ice_set_vf_state_qs_dis(vf); 542 mutex_unlock(&pf->vfs.table_lock); 543 544 if (ice_is_eswitch_mode_switchdev(pf)) { 545 if (reset_type != ICE_RESET_PFR) 546 ice_clear_sw_switch_recipes(pf); 547 } 548 549 /* release ADQ specific HW and SW resources */ 550 vsi = ice_get_main_vsi(pf); 551 if (!vsi) 552 goto skip; 553 554 /* to be on safe side, reset orig_rss_size so that normal flow 555 * of deciding rss_size can take precedence 556 */ 557 vsi->orig_rss_size = 0; 558 559 if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { 560 if (reset_type == ICE_RESET_PFR) { 561 vsi->old_ena_tc = vsi->all_enatc; 562 vsi->old_numtc = vsi->all_numtc; 563 } else { 564 ice_remove_q_channels(vsi, true); 565 566 /* for other reset type, do not support channel rebuild 567 * hence reset needed info 568 */ 569 vsi->old_ena_tc = 0; 570 vsi->all_enatc = 0; 571 vsi->old_numtc = 0; 572 vsi->all_numtc = 0; 573 vsi->req_txq = 0; 574 vsi->req_rxq = 0; 575 clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); 576 memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); 577 } 578 } 579 skip: 580 581 /* clear SW filtering DB */ 582 ice_clear_hw_tbls(hw); 583 /* disable the VSIs and their queues that are not already DOWN */ 584 ice_pf_dis_all_vsi(pf, false); 585 586 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) 587 ice_ptp_prepare_for_reset(pf); 588 589 if (ice_is_feature_supported(pf, ICE_F_GNSS)) 590 ice_gnss_exit(pf); 591 592 if (hw->port_info) 593 ice_sched_clear_port(hw->port_info); 594 595 ice_shutdown_all_ctrlq(hw); 596 597 set_bit(ICE_PREPARED_FOR_RESET, pf->state); 598 } 599 600 /** 601 * ice_do_reset - Initiate one of many types of resets 602 * @pf: board private structure 603 * @reset_type: reset type requested before this function was called. 604 */ 605 static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) 606 { 607 struct device *dev = ice_pf_to_dev(pf); 608 struct ice_hw *hw = &pf->hw; 609 610 dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); 611 612 ice_prepare_for_reset(pf, reset_type); 613 614 /* trigger the reset */ 615 if (ice_reset(hw, reset_type)) { 616 dev_err(dev, "reset %d failed\n", reset_type); 617 set_bit(ICE_RESET_FAILED, pf->state); 618 clear_bit(ICE_RESET_OICR_RECV, pf->state); 619 clear_bit(ICE_PREPARED_FOR_RESET, pf->state); 620 clear_bit(ICE_PFR_REQ, pf->state); 621 clear_bit(ICE_CORER_REQ, pf->state); 622 clear_bit(ICE_GLOBR_REQ, pf->state); 623 wake_up(&pf->reset_wait_queue); 624 return; 625 } 626 627 /* PFR is a bit of a special case because it doesn't result in an OICR 628 * interrupt. So for PFR, rebuild after the reset and clear the reset- 629 * associated state bits. 630 */ 631 if (reset_type == ICE_RESET_PFR) { 632 pf->pfr_count++; 633 ice_rebuild(pf, reset_type); 634 clear_bit(ICE_PREPARED_FOR_RESET, pf->state); 635 clear_bit(ICE_PFR_REQ, pf->state); 636 wake_up(&pf->reset_wait_queue); 637 ice_reset_all_vfs(pf); 638 } 639 } 640 641 /** 642 * ice_reset_subtask - Set up for resetting the device and driver 643 * @pf: board private structure 644 */ 645 static void ice_reset_subtask(struct ice_pf *pf) 646 { 647 enum ice_reset_req reset_type = ICE_RESET_INVAL; 648 649 /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an 650 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type 651 * of reset is pending and sets bits in pf->state indicating the reset 652 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set 653 * prepare for pending reset if not already (for PF software-initiated 654 * global resets the software should already be prepared for it as 655 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated 656 * by firmware or software on other PFs, that bit is not set so prepare 657 * for the reset now), poll for reset done, rebuild and return. 658 */ 659 if (test_bit(ICE_RESET_OICR_RECV, pf->state)) { 660 /* Perform the largest reset requested */ 661 if (test_and_clear_bit(ICE_CORER_RECV, pf->state)) 662 reset_type = ICE_RESET_CORER; 663 if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state)) 664 reset_type = ICE_RESET_GLOBR; 665 if (test_and_clear_bit(ICE_EMPR_RECV, pf->state)) 666 reset_type = ICE_RESET_EMPR; 667 /* return if no valid reset type requested */ 668 if (reset_type == ICE_RESET_INVAL) 669 return; 670 ice_prepare_for_reset(pf, reset_type); 671 672 /* make sure we are ready to rebuild */ 673 if (ice_check_reset(&pf->hw)) { 674 set_bit(ICE_RESET_FAILED, pf->state); 675 } else { 676 /* done with reset. start rebuild */ 677 pf->hw.reset_ongoing = false; 678 ice_rebuild(pf, reset_type); 679 /* clear bit to resume normal operations, but 680 * ICE_NEEDS_RESTART bit is set in case rebuild failed 681 */ 682 clear_bit(ICE_RESET_OICR_RECV, pf->state); 683 clear_bit(ICE_PREPARED_FOR_RESET, pf->state); 684 clear_bit(ICE_PFR_REQ, pf->state); 685 clear_bit(ICE_CORER_REQ, pf->state); 686 clear_bit(ICE_GLOBR_REQ, pf->state); 687 wake_up(&pf->reset_wait_queue); 688 ice_reset_all_vfs(pf); 689 } 690 691 return; 692 } 693 694 /* No pending resets to finish processing. Check for new resets */ 695 if (test_bit(ICE_PFR_REQ, pf->state)) 696 reset_type = ICE_RESET_PFR; 697 if (test_bit(ICE_CORER_REQ, pf->state)) 698 reset_type = ICE_RESET_CORER; 699 if (test_bit(ICE_GLOBR_REQ, pf->state)) 700 reset_type = ICE_RESET_GLOBR; 701 /* If no valid reset type requested just return */ 702 if (reset_type == ICE_RESET_INVAL) 703 return; 704 705 /* reset if not already down or busy */ 706 if (!test_bit(ICE_DOWN, pf->state) && 707 !test_bit(ICE_CFG_BUSY, pf->state)) { 708 ice_do_reset(pf, reset_type); 709 } 710 } 711 712 /** 713 * ice_print_topo_conflict - print topology conflict message 714 * @vsi: the VSI whose topology status is being checked 715 */ 716 static void ice_print_topo_conflict(struct ice_vsi *vsi) 717 { 718 switch (vsi->port_info->phy.link_info.topo_media_conflict) { 719 case ICE_AQ_LINK_TOPO_CONFLICT: 720 case ICE_AQ_LINK_MEDIA_CONFLICT: 721 case ICE_AQ_LINK_TOPO_UNREACH_PRT: 722 case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: 723 case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: 724 netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n"); 725 break; 726 case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: 727 if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags)) 728 netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n"); 729 else 730 netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); 731 break; 732 default: 733 break; 734 } 735 } 736 737 /** 738 * ice_print_link_msg - print link up or down message 739 * @vsi: the VSI whose link status is being queried 740 * @isup: boolean for if the link is now up or down 741 */ 742 void ice_print_link_msg(struct ice_vsi *vsi, bool isup) 743 { 744 struct ice_aqc_get_phy_caps_data *caps; 745 const char *an_advertised; 746 const char *fec_req; 747 const char *speed; 748 const char *fec; 749 const char *fc; 750 const char *an; 751 int status; 752 753 if (!vsi) 754 return; 755 756 if (vsi->current_isup == isup) 757 return; 758 759 vsi->current_isup = isup; 760 761 if (!isup) { 762 netdev_info(vsi->netdev, "NIC Link is Down\n"); 763 return; 764 } 765 766 switch (vsi->port_info->phy.link_info.link_speed) { 767 case ICE_AQ_LINK_SPEED_100GB: 768 speed = "100 G"; 769 break; 770 case ICE_AQ_LINK_SPEED_50GB: 771 speed = "50 G"; 772 break; 773 case ICE_AQ_LINK_SPEED_40GB: 774 speed = "40 G"; 775 break; 776 case ICE_AQ_LINK_SPEED_25GB: 777 speed = "25 G"; 778 break; 779 case ICE_AQ_LINK_SPEED_20GB: 780 speed = "20 G"; 781 break; 782 case ICE_AQ_LINK_SPEED_10GB: 783 speed = "10 G"; 784 break; 785 case ICE_AQ_LINK_SPEED_5GB: 786 speed = "5 G"; 787 break; 788 case ICE_AQ_LINK_SPEED_2500MB: 789 speed = "2.5 G"; 790 break; 791 case ICE_AQ_LINK_SPEED_1000MB: 792 speed = "1 G"; 793 break; 794 case ICE_AQ_LINK_SPEED_100MB: 795 speed = "100 M"; 796 break; 797 default: 798 speed = "Unknown "; 799 break; 800 } 801 802 switch (vsi->port_info->fc.current_mode) { 803 case ICE_FC_FULL: 804 fc = "Rx/Tx"; 805 break; 806 case ICE_FC_TX_PAUSE: 807 fc = "Tx"; 808 break; 809 case ICE_FC_RX_PAUSE: 810 fc = "Rx"; 811 break; 812 case ICE_FC_NONE: 813 fc = "None"; 814 break; 815 default: 816 fc = "Unknown"; 817 break; 818 } 819 820 /* Get FEC mode based on negotiated link info */ 821 switch (vsi->port_info->phy.link_info.fec_info) { 822 case ICE_AQ_LINK_25G_RS_528_FEC_EN: 823 case ICE_AQ_LINK_25G_RS_544_FEC_EN: 824 fec = "RS-FEC"; 825 break; 826 case ICE_AQ_LINK_25G_KR_FEC_EN: 827 fec = "FC-FEC/BASE-R"; 828 break; 829 default: 830 fec = "NONE"; 831 break; 832 } 833 834 /* check if autoneg completed, might be false due to not supported */ 835 if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) 836 an = "True"; 837 else 838 an = "False"; 839 840 /* Get FEC mode requested based on PHY caps last SW configuration */ 841 caps = kzalloc(sizeof(*caps), GFP_KERNEL); 842 if (!caps) { 843 fec_req = "Unknown"; 844 an_advertised = "Unknown"; 845 goto done; 846 } 847 848 status = ice_aq_get_phy_caps(vsi->port_info, false, 849 ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); 850 if (status) 851 netdev_info(vsi->netdev, "Get phy capability failed.\n"); 852 853 an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off"; 854 855 if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ || 856 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ) 857 fec_req = "RS-FEC"; 858 else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ || 859 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ) 860 fec_req = "FC-FEC/BASE-R"; 861 else 862 fec_req = "NONE"; 863 864 kfree(caps); 865 866 done: 867 netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n", 868 speed, fec_req, fec, an_advertised, an, fc); 869 ice_print_topo_conflict(vsi); 870 } 871 872 /** 873 * ice_vsi_link_event - update the VSI's netdev 874 * @vsi: the VSI on which the link event occurred 875 * @link_up: whether or not the VSI needs to be set up or down 876 */ 877 static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up) 878 { 879 if (!vsi) 880 return; 881 882 if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev) 883 return; 884 885 if (vsi->type == ICE_VSI_PF) { 886 if (link_up == netif_carrier_ok(vsi->netdev)) 887 return; 888 889 if (link_up) { 890 netif_carrier_on(vsi->netdev); 891 netif_tx_wake_all_queues(vsi->netdev); 892 } else { 893 netif_carrier_off(vsi->netdev); 894 netif_tx_stop_all_queues(vsi->netdev); 895 } 896 } 897 } 898 899 /** 900 * ice_set_dflt_mib - send a default config MIB to the FW 901 * @pf: private PF struct 902 * 903 * This function sends a default configuration MIB to the FW. 904 * 905 * If this function errors out at any point, the driver is still able to 906 * function. The main impact is that LFC may not operate as expected. 907 * Therefore an error state in this function should be treated with a DBG 908 * message and continue on with driver rebuild/reenable. 909 */ 910 static void ice_set_dflt_mib(struct ice_pf *pf) 911 { 912 struct device *dev = ice_pf_to_dev(pf); 913 u8 mib_type, *buf, *lldpmib = NULL; 914 u16 len, typelen, offset = 0; 915 struct ice_lldp_org_tlv *tlv; 916 struct ice_hw *hw = &pf->hw; 917 u32 ouisubtype; 918 919 mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB; 920 lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL); 921 if (!lldpmib) { 922 dev_dbg(dev, "%s Failed to allocate MIB memory\n", 923 __func__); 924 return; 925 } 926 927 /* Add ETS CFG TLV */ 928 tlv = (struct ice_lldp_org_tlv *)lldpmib; 929 typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | 930 ICE_IEEE_ETS_TLV_LEN); 931 tlv->typelen = htons(typelen); 932 ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) | 933 ICE_IEEE_SUBTYPE_ETS_CFG); 934 tlv->ouisubtype = htonl(ouisubtype); 935 936 buf = tlv->tlvinfo; 937 buf[0] = 0; 938 939 /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0. 940 * Octets 5 - 12 are BW values, set octet 5 to 100% BW. 941 * Octets 13 - 20 are TSA values - leave as zeros 942 */ 943 buf[5] = 0x64; 944 len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; 945 offset += len + 2; 946 tlv = (struct ice_lldp_org_tlv *) 947 ((char *)tlv + sizeof(tlv->typelen) + len); 948 949 /* Add ETS REC TLV */ 950 buf = tlv->tlvinfo; 951 tlv->typelen = htons(typelen); 952 953 ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) | 954 ICE_IEEE_SUBTYPE_ETS_REC); 955 tlv->ouisubtype = htonl(ouisubtype); 956 957 /* First octet of buf is reserved 958 * Octets 1 - 4 map UP to TC - all UPs map to zero 959 * Octets 5 - 12 are BW values - set TC 0 to 100%. 960 * Octets 13 - 20 are TSA value - leave as zeros 961 */ 962 buf[5] = 0x64; 963 offset += len + 2; 964 tlv = (struct ice_lldp_org_tlv *) 965 ((char *)tlv + sizeof(tlv->typelen) + len); 966 967 /* Add PFC CFG TLV */ 968 typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | 969 ICE_IEEE_PFC_TLV_LEN); 970 tlv->typelen = htons(typelen); 971 972 ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) | 973 ICE_IEEE_SUBTYPE_PFC_CFG); 974 tlv->ouisubtype = htonl(ouisubtype); 975 976 /* Octet 1 left as all zeros - PFC disabled */ 977 buf[0] = 0x08; 978 len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; 979 offset += len + 2; 980 981 if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL)) 982 dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__); 983 984 kfree(lldpmib); 985 } 986 987 /** 988 * ice_check_phy_fw_load - check if PHY FW load failed 989 * @pf: pointer to PF struct 990 * @link_cfg_err: bitmap from the link info structure 991 * 992 * check if external PHY FW load failed and print an error message if it did 993 */ 994 static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err) 995 { 996 if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) { 997 clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags); 998 return; 999 } 1000 1001 if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags)) 1002 return; 1003 1004 if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) { 1005 dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n"); 1006 set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags); 1007 } 1008 } 1009 1010 /** 1011 * ice_check_module_power 1012 * @pf: pointer to PF struct 1013 * @link_cfg_err: bitmap from the link info structure 1014 * 1015 * check module power level returned by a previous call to aq_get_link_info 1016 * and print error messages if module power level is not supported 1017 */ 1018 static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err) 1019 { 1020 /* if module power level is supported, clear the flag */ 1021 if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT | 1022 ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) { 1023 clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); 1024 return; 1025 } 1026 1027 /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the 1028 * above block didn't clear this bit, there's nothing to do 1029 */ 1030 if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags)) 1031 return; 1032 1033 if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) { 1034 dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n"); 1035 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); 1036 } else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) { 1037 dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n"); 1038 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); 1039 } 1040 } 1041 1042 /** 1043 * ice_check_link_cfg_err - check if link configuration failed 1044 * @pf: pointer to the PF struct 1045 * @link_cfg_err: bitmap from the link info structure 1046 * 1047 * print if any link configuration failure happens due to the value in the 1048 * link_cfg_err parameter in the link info structure 1049 */ 1050 static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err) 1051 { 1052 ice_check_module_power(pf, link_cfg_err); 1053 ice_check_phy_fw_load(pf, link_cfg_err); 1054 } 1055 1056 /** 1057 * ice_link_event - process the link event 1058 * @pf: PF that the link event is associated with 1059 * @pi: port_info for the port that the link event is associated with 1060 * @link_up: true if the physical link is up and false if it is down 1061 * @link_speed: current link speed received from the link event 1062 * 1063 * Returns 0 on success and negative on failure 1064 */ 1065 static int 1066 ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, 1067 u16 link_speed) 1068 { 1069 struct device *dev = ice_pf_to_dev(pf); 1070 struct ice_phy_info *phy_info; 1071 struct ice_vsi *vsi; 1072 u16 old_link_speed; 1073 bool old_link; 1074 int status; 1075 1076 phy_info = &pi->phy; 1077 phy_info->link_info_old = phy_info->link_info; 1078 1079 old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP); 1080 old_link_speed = phy_info->link_info_old.link_speed; 1081 1082 /* update the link info structures and re-enable link events, 1083 * don't bail on failure due to other book keeping needed 1084 */ 1085 status = ice_update_link_info(pi); 1086 if (status) 1087 dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n", 1088 pi->lport, status, 1089 ice_aq_str(pi->hw->adminq.sq_last_status)); 1090 1091 ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); 1092 1093 /* Check if the link state is up after updating link info, and treat 1094 * this event as an UP event since the link is actually UP now. 1095 */ 1096 if (phy_info->link_info.link_info & ICE_AQ_LINK_UP) 1097 link_up = true; 1098 1099 vsi = ice_get_main_vsi(pf); 1100 if (!vsi || !vsi->port_info) 1101 return -EINVAL; 1102 1103 /* turn off PHY if media was removed */ 1104 if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) && 1105 !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { 1106 set_bit(ICE_FLAG_NO_MEDIA, pf->flags); 1107 ice_set_link(vsi, false); 1108 } 1109 1110 /* if the old link up/down and speed is the same as the new */ 1111 if (link_up == old_link && link_speed == old_link_speed) 1112 return 0; 1113 1114 if (!ice_is_e810(&pf->hw)) 1115 ice_ptp_link_change(pf, pf->hw.pf_id, link_up); 1116 1117 if (ice_is_dcb_active(pf)) { 1118 if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) 1119 ice_dcb_rebuild(pf); 1120 } else { 1121 if (link_up) 1122 ice_set_dflt_mib(pf); 1123 } 1124 ice_vsi_link_event(vsi, link_up); 1125 ice_print_link_msg(vsi, link_up); 1126 1127 ice_vc_notify_link_state(pf); 1128 1129 return 0; 1130 } 1131 1132 /** 1133 * ice_watchdog_subtask - periodic tasks not using event driven scheduling 1134 * @pf: board private structure 1135 */ 1136 static void ice_watchdog_subtask(struct ice_pf *pf) 1137 { 1138 int i; 1139 1140 /* if interface is down do nothing */ 1141 if (test_bit(ICE_DOWN, pf->state) || 1142 test_bit(ICE_CFG_BUSY, pf->state)) 1143 return; 1144 1145 /* make sure we don't do these things too often */ 1146 if (time_before(jiffies, 1147 pf->serv_tmr_prev + pf->serv_tmr_period)) 1148 return; 1149 1150 pf->serv_tmr_prev = jiffies; 1151 1152 /* Update the stats for active netdevs so the network stack 1153 * can look at updated numbers whenever it cares to 1154 */ 1155 ice_update_pf_stats(pf); 1156 ice_for_each_vsi(pf, i) 1157 if (pf->vsi[i] && pf->vsi[i]->netdev) 1158 ice_update_vsi_stats(pf->vsi[i]); 1159 } 1160 1161 /** 1162 * ice_init_link_events - enable/initialize link events 1163 * @pi: pointer to the port_info instance 1164 * 1165 * Returns -EIO on failure, 0 on success 1166 */ 1167 static int ice_init_link_events(struct ice_port_info *pi) 1168 { 1169 u16 mask; 1170 1171 mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | 1172 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL | 1173 ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL)); 1174 1175 if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { 1176 dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n", 1177 pi->lport); 1178 return -EIO; 1179 } 1180 1181 if (ice_aq_get_link_info(pi, true, NULL, NULL)) { 1182 dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n", 1183 pi->lport); 1184 return -EIO; 1185 } 1186 1187 return 0; 1188 } 1189 1190 /** 1191 * ice_handle_link_event - handle link event via ARQ 1192 * @pf: PF that the link event is associated with 1193 * @event: event structure containing link status info 1194 */ 1195 static int 1196 ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) 1197 { 1198 struct ice_aqc_get_link_status_data *link_data; 1199 struct ice_port_info *port_info; 1200 int status; 1201 1202 link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf; 1203 port_info = pf->hw.port_info; 1204 if (!port_info) 1205 return -EINVAL; 1206 1207 status = ice_link_event(pf, port_info, 1208 !!(link_data->link_info & ICE_AQ_LINK_UP), 1209 le16_to_cpu(link_data->link_speed)); 1210 if (status) 1211 dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n", 1212 status); 1213 1214 return status; 1215 } 1216 1217 enum ice_aq_task_state { 1218 ICE_AQ_TASK_WAITING = 0, 1219 ICE_AQ_TASK_COMPLETE, 1220 ICE_AQ_TASK_CANCELED, 1221 }; 1222 1223 struct ice_aq_task { 1224 struct hlist_node entry; 1225 1226 u16 opcode; 1227 struct ice_rq_event_info *event; 1228 enum ice_aq_task_state state; 1229 }; 1230 1231 /** 1232 * ice_aq_wait_for_event - Wait for an AdminQ event from firmware 1233 * @pf: pointer to the PF private structure 1234 * @opcode: the opcode to wait for 1235 * @timeout: how long to wait, in jiffies 1236 * @event: storage for the event info 1237 * 1238 * Waits for a specific AdminQ completion event on the ARQ for a given PF. The 1239 * current thread will be put to sleep until the specified event occurs or 1240 * until the given timeout is reached. 1241 * 1242 * To obtain only the descriptor contents, pass an event without an allocated 1243 * msg_buf. If the complete data buffer is desired, allocate the 1244 * event->msg_buf with enough space ahead of time. 1245 * 1246 * Returns: zero on success, or a negative error code on failure. 1247 */ 1248 int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, 1249 struct ice_rq_event_info *event) 1250 { 1251 struct device *dev = ice_pf_to_dev(pf); 1252 struct ice_aq_task *task; 1253 unsigned long start; 1254 long ret; 1255 int err; 1256 1257 task = kzalloc(sizeof(*task), GFP_KERNEL); 1258 if (!task) 1259 return -ENOMEM; 1260 1261 INIT_HLIST_NODE(&task->entry); 1262 task->opcode = opcode; 1263 task->event = event; 1264 task->state = ICE_AQ_TASK_WAITING; 1265 1266 spin_lock_bh(&pf->aq_wait_lock); 1267 hlist_add_head(&task->entry, &pf->aq_wait_list); 1268 spin_unlock_bh(&pf->aq_wait_lock); 1269 1270 start = jiffies; 1271 1272 ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state, 1273 timeout); 1274 switch (task->state) { 1275 case ICE_AQ_TASK_WAITING: 1276 err = ret < 0 ? ret : -ETIMEDOUT; 1277 break; 1278 case ICE_AQ_TASK_CANCELED: 1279 err = ret < 0 ? ret : -ECANCELED; 1280 break; 1281 case ICE_AQ_TASK_COMPLETE: 1282 err = ret < 0 ? ret : 0; 1283 break; 1284 default: 1285 WARN(1, "Unexpected AdminQ wait task state %u", task->state); 1286 err = -EINVAL; 1287 break; 1288 } 1289 1290 dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n", 1291 jiffies_to_msecs(jiffies - start), 1292 jiffies_to_msecs(timeout), 1293 opcode); 1294 1295 spin_lock_bh(&pf->aq_wait_lock); 1296 hlist_del(&task->entry); 1297 spin_unlock_bh(&pf->aq_wait_lock); 1298 kfree(task); 1299 1300 return err; 1301 } 1302 1303 /** 1304 * ice_aq_check_events - Check if any thread is waiting for an AdminQ event 1305 * @pf: pointer to the PF private structure 1306 * @opcode: the opcode of the event 1307 * @event: the event to check 1308 * 1309 * Loops over the current list of pending threads waiting for an AdminQ event. 1310 * For each matching task, copy the contents of the event into the task 1311 * structure and wake up the thread. 1312 * 1313 * If multiple threads wait for the same opcode, they will all be woken up. 1314 * 1315 * Note that event->msg_buf will only be duplicated if the event has a buffer 1316 * with enough space already allocated. Otherwise, only the descriptor and 1317 * message length will be copied. 1318 * 1319 * Returns: true if an event was found, false otherwise 1320 */ 1321 static void ice_aq_check_events(struct ice_pf *pf, u16 opcode, 1322 struct ice_rq_event_info *event) 1323 { 1324 struct ice_aq_task *task; 1325 bool found = false; 1326 1327 spin_lock_bh(&pf->aq_wait_lock); 1328 hlist_for_each_entry(task, &pf->aq_wait_list, entry) { 1329 if (task->state || task->opcode != opcode) 1330 continue; 1331 1332 memcpy(&task->event->desc, &event->desc, sizeof(event->desc)); 1333 task->event->msg_len = event->msg_len; 1334 1335 /* Only copy the data buffer if a destination was set */ 1336 if (task->event->msg_buf && 1337 task->event->buf_len > event->buf_len) { 1338 memcpy(task->event->msg_buf, event->msg_buf, 1339 event->buf_len); 1340 task->event->buf_len = event->buf_len; 1341 } 1342 1343 task->state = ICE_AQ_TASK_COMPLETE; 1344 found = true; 1345 } 1346 spin_unlock_bh(&pf->aq_wait_lock); 1347 1348 if (found) 1349 wake_up(&pf->aq_wait_queue); 1350 } 1351 1352 /** 1353 * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks 1354 * @pf: the PF private structure 1355 * 1356 * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads. 1357 * This will then cause ice_aq_wait_for_event to exit with -ECANCELED. 1358 */ 1359 static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf) 1360 { 1361 struct ice_aq_task *task; 1362 1363 spin_lock_bh(&pf->aq_wait_lock); 1364 hlist_for_each_entry(task, &pf->aq_wait_list, entry) 1365 task->state = ICE_AQ_TASK_CANCELED; 1366 spin_unlock_bh(&pf->aq_wait_lock); 1367 1368 wake_up(&pf->aq_wait_queue); 1369 } 1370 1371 /** 1372 * __ice_clean_ctrlq - helper function to clean controlq rings 1373 * @pf: ptr to struct ice_pf 1374 * @q_type: specific Control queue type 1375 */ 1376 static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) 1377 { 1378 struct device *dev = ice_pf_to_dev(pf); 1379 struct ice_rq_event_info event; 1380 struct ice_hw *hw = &pf->hw; 1381 struct ice_ctl_q_info *cq; 1382 u16 pending, i = 0; 1383 const char *qtype; 1384 u32 oldval, val; 1385 1386 /* Do not clean control queue if/when PF reset fails */ 1387 if (test_bit(ICE_RESET_FAILED, pf->state)) 1388 return 0; 1389 1390 switch (q_type) { 1391 case ICE_CTL_Q_ADMIN: 1392 cq = &hw->adminq; 1393 qtype = "Admin"; 1394 break; 1395 case ICE_CTL_Q_SB: 1396 cq = &hw->sbq; 1397 qtype = "Sideband"; 1398 break; 1399 case ICE_CTL_Q_MAILBOX: 1400 cq = &hw->mailboxq; 1401 qtype = "Mailbox"; 1402 /* we are going to try to detect a malicious VF, so set the 1403 * state to begin detection 1404 */ 1405 hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; 1406 break; 1407 default: 1408 dev_warn(dev, "Unknown control queue type 0x%x\n", q_type); 1409 return 0; 1410 } 1411 1412 /* check for error indications - PF_xx_AxQLEN register layout for 1413 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN. 1414 */ 1415 val = rd32(hw, cq->rq.len); 1416 if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | 1417 PF_FW_ARQLEN_ARQCRIT_M)) { 1418 oldval = val; 1419 if (val & PF_FW_ARQLEN_ARQVFE_M) 1420 dev_dbg(dev, "%s Receive Queue VF Error detected\n", 1421 qtype); 1422 if (val & PF_FW_ARQLEN_ARQOVFL_M) { 1423 dev_dbg(dev, "%s Receive Queue Overflow Error detected\n", 1424 qtype); 1425 } 1426 if (val & PF_FW_ARQLEN_ARQCRIT_M) 1427 dev_dbg(dev, "%s Receive Queue Critical Error detected\n", 1428 qtype); 1429 val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | 1430 PF_FW_ARQLEN_ARQCRIT_M); 1431 if (oldval != val) 1432 wr32(hw, cq->rq.len, val); 1433 } 1434 1435 val = rd32(hw, cq->sq.len); 1436 if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | 1437 PF_FW_ATQLEN_ATQCRIT_M)) { 1438 oldval = val; 1439 if (val & PF_FW_ATQLEN_ATQVFE_M) 1440 dev_dbg(dev, "%s Send Queue VF Error detected\n", 1441 qtype); 1442 if (val & PF_FW_ATQLEN_ATQOVFL_M) { 1443 dev_dbg(dev, "%s Send Queue Overflow Error detected\n", 1444 qtype); 1445 } 1446 if (val & PF_FW_ATQLEN_ATQCRIT_M) 1447 dev_dbg(dev, "%s Send Queue Critical Error detected\n", 1448 qtype); 1449 val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | 1450 PF_FW_ATQLEN_ATQCRIT_M); 1451 if (oldval != val) 1452 wr32(hw, cq->sq.len, val); 1453 } 1454 1455 event.buf_len = cq->rq_buf_size; 1456 event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); 1457 if (!event.msg_buf) 1458 return 0; 1459 1460 do { 1461 u16 opcode; 1462 int ret; 1463 1464 ret = ice_clean_rq_elem(hw, cq, &event, &pending); 1465 if (ret == -EALREADY) 1466 break; 1467 if (ret) { 1468 dev_err(dev, "%s Receive Queue event error %d\n", qtype, 1469 ret); 1470 break; 1471 } 1472 1473 opcode = le16_to_cpu(event.desc.opcode); 1474 1475 /* Notify any thread that might be waiting for this event */ 1476 ice_aq_check_events(pf, opcode, &event); 1477 1478 switch (opcode) { 1479 case ice_aqc_opc_get_link_status: 1480 if (ice_handle_link_event(pf, &event)) 1481 dev_err(dev, "Could not handle link event\n"); 1482 break; 1483 case ice_aqc_opc_event_lan_overflow: 1484 ice_vf_lan_overflow_event(pf, &event); 1485 break; 1486 case ice_mbx_opc_send_msg_to_pf: 1487 if (!ice_is_malicious_vf(pf, &event, i, pending)) 1488 ice_vc_process_vf_msg(pf, &event); 1489 break; 1490 case ice_aqc_opc_fw_logging: 1491 ice_output_fw_log(hw, &event.desc, event.msg_buf); 1492 break; 1493 case ice_aqc_opc_lldp_set_mib_change: 1494 ice_dcb_process_lldp_set_mib_change(pf, &event); 1495 break; 1496 default: 1497 dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n", 1498 qtype, opcode); 1499 break; 1500 } 1501 } while (pending && (i++ < ICE_DFLT_IRQ_WORK)); 1502 1503 kfree(event.msg_buf); 1504 1505 return pending && (i == ICE_DFLT_IRQ_WORK); 1506 } 1507 1508 /** 1509 * ice_ctrlq_pending - check if there is a difference between ntc and ntu 1510 * @hw: pointer to hardware info 1511 * @cq: control queue information 1512 * 1513 * returns true if there are pending messages in a queue, false if there aren't 1514 */ 1515 static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq) 1516 { 1517 u16 ntu; 1518 1519 ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); 1520 return cq->rq.next_to_clean != ntu; 1521 } 1522 1523 /** 1524 * ice_clean_adminq_subtask - clean the AdminQ rings 1525 * @pf: board private structure 1526 */ 1527 static void ice_clean_adminq_subtask(struct ice_pf *pf) 1528 { 1529 struct ice_hw *hw = &pf->hw; 1530 1531 if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) 1532 return; 1533 1534 if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN)) 1535 return; 1536 1537 clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); 1538 1539 /* There might be a situation where new messages arrive to a control 1540 * queue between processing the last message and clearing the 1541 * EVENT_PENDING bit. So before exiting, check queue head again (using 1542 * ice_ctrlq_pending) and process new messages if any. 1543 */ 1544 if (ice_ctrlq_pending(hw, &hw->adminq)) 1545 __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN); 1546 1547 ice_flush(hw); 1548 } 1549 1550 /** 1551 * ice_clean_mailboxq_subtask - clean the MailboxQ rings 1552 * @pf: board private structure 1553 */ 1554 static void ice_clean_mailboxq_subtask(struct ice_pf *pf) 1555 { 1556 struct ice_hw *hw = &pf->hw; 1557 1558 if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state)) 1559 return; 1560 1561 if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX)) 1562 return; 1563 1564 clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); 1565 1566 if (ice_ctrlq_pending(hw, &hw->mailboxq)) 1567 __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX); 1568 1569 ice_flush(hw); 1570 } 1571 1572 /** 1573 * ice_clean_sbq_subtask - clean the Sideband Queue rings 1574 * @pf: board private structure 1575 */ 1576 static void ice_clean_sbq_subtask(struct ice_pf *pf) 1577 { 1578 struct ice_hw *hw = &pf->hw; 1579 1580 /* Nothing to do here if sideband queue is not supported */ 1581 if (!ice_is_sbq_supported(hw)) { 1582 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); 1583 return; 1584 } 1585 1586 if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state)) 1587 return; 1588 1589 if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB)) 1590 return; 1591 1592 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); 1593 1594 if (ice_ctrlq_pending(hw, &hw->sbq)) 1595 __ice_clean_ctrlq(pf, ICE_CTL_Q_SB); 1596 1597 ice_flush(hw); 1598 } 1599 1600 /** 1601 * ice_service_task_schedule - schedule the service task to wake up 1602 * @pf: board private structure 1603 * 1604 * If not already scheduled, this puts the task into the work queue. 1605 */ 1606 void ice_service_task_schedule(struct ice_pf *pf) 1607 { 1608 if (!test_bit(ICE_SERVICE_DIS, pf->state) && 1609 !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) && 1610 !test_bit(ICE_NEEDS_RESTART, pf->state)) 1611 queue_work(ice_wq, &pf->serv_task); 1612 } 1613 1614 /** 1615 * ice_service_task_complete - finish up the service task 1616 * @pf: board private structure 1617 */ 1618 static void ice_service_task_complete(struct ice_pf *pf) 1619 { 1620 WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state)); 1621 1622 /* force memory (pf->state) to sync before next service task */ 1623 smp_mb__before_atomic(); 1624 clear_bit(ICE_SERVICE_SCHED, pf->state); 1625 } 1626 1627 /** 1628 * ice_service_task_stop - stop service task and cancel works 1629 * @pf: board private structure 1630 * 1631 * Return 0 if the ICE_SERVICE_DIS bit was not already set, 1632 * 1 otherwise. 1633 */ 1634 static int ice_service_task_stop(struct ice_pf *pf) 1635 { 1636 int ret; 1637 1638 ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state); 1639 1640 if (pf->serv_tmr.function) 1641 del_timer_sync(&pf->serv_tmr); 1642 if (pf->serv_task.func) 1643 cancel_work_sync(&pf->serv_task); 1644 1645 clear_bit(ICE_SERVICE_SCHED, pf->state); 1646 return ret; 1647 } 1648 1649 /** 1650 * ice_service_task_restart - restart service task and schedule works 1651 * @pf: board private structure 1652 * 1653 * This function is needed for suspend and resume works (e.g WoL scenario) 1654 */ 1655 static void ice_service_task_restart(struct ice_pf *pf) 1656 { 1657 clear_bit(ICE_SERVICE_DIS, pf->state); 1658 ice_service_task_schedule(pf); 1659 } 1660 1661 /** 1662 * ice_service_timer - timer callback to schedule service task 1663 * @t: pointer to timer_list 1664 */ 1665 static void ice_service_timer(struct timer_list *t) 1666 { 1667 struct ice_pf *pf = from_timer(pf, t, serv_tmr); 1668 1669 mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies)); 1670 ice_service_task_schedule(pf); 1671 } 1672 1673 /** 1674 * ice_handle_mdd_event - handle malicious driver detect event 1675 * @pf: pointer to the PF structure 1676 * 1677 * Called from service task. OICR interrupt handler indicates MDD event. 1678 * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log 1679 * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events 1680 * disable the queue, the PF can be configured to reset the VF using ethtool 1681 * private flag mdd-auto-reset-vf. 1682 */ 1683 static void ice_handle_mdd_event(struct ice_pf *pf) 1684 { 1685 struct device *dev = ice_pf_to_dev(pf); 1686 struct ice_hw *hw = &pf->hw; 1687 struct ice_vf *vf; 1688 unsigned int bkt; 1689 u32 reg; 1690 1691 if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { 1692 /* Since the VF MDD event logging is rate limited, check if 1693 * there are pending MDD events. 1694 */ 1695 ice_print_vfs_mdd_events(pf); 1696 return; 1697 } 1698 1699 /* find what triggered an MDD event */ 1700 reg = rd32(hw, GL_MDET_TX_PQM); 1701 if (reg & GL_MDET_TX_PQM_VALID_M) { 1702 u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> 1703 GL_MDET_TX_PQM_PF_NUM_S; 1704 u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> 1705 GL_MDET_TX_PQM_VF_NUM_S; 1706 u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> 1707 GL_MDET_TX_PQM_MAL_TYPE_S; 1708 u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >> 1709 GL_MDET_TX_PQM_QNUM_S); 1710 1711 if (netif_msg_tx_err(pf)) 1712 dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", 1713 event, queue, pf_num, vf_num); 1714 wr32(hw, GL_MDET_TX_PQM, 0xffffffff); 1715 } 1716 1717 reg = rd32(hw, GL_MDET_TX_TCLAN); 1718 if (reg & GL_MDET_TX_TCLAN_VALID_M) { 1719 u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> 1720 GL_MDET_TX_TCLAN_PF_NUM_S; 1721 u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> 1722 GL_MDET_TX_TCLAN_VF_NUM_S; 1723 u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> 1724 GL_MDET_TX_TCLAN_MAL_TYPE_S; 1725 u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> 1726 GL_MDET_TX_TCLAN_QNUM_S); 1727 1728 if (netif_msg_tx_err(pf)) 1729 dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", 1730 event, queue, pf_num, vf_num); 1731 wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); 1732 } 1733 1734 reg = rd32(hw, GL_MDET_RX); 1735 if (reg & GL_MDET_RX_VALID_M) { 1736 u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >> 1737 GL_MDET_RX_PF_NUM_S; 1738 u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> 1739 GL_MDET_RX_VF_NUM_S; 1740 u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >> 1741 GL_MDET_RX_MAL_TYPE_S; 1742 u16 queue = ((reg & GL_MDET_RX_QNUM_M) >> 1743 GL_MDET_RX_QNUM_S); 1744 1745 if (netif_msg_rx_err(pf)) 1746 dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", 1747 event, queue, pf_num, vf_num); 1748 wr32(hw, GL_MDET_RX, 0xffffffff); 1749 } 1750 1751 /* check to see if this PF caused an MDD event */ 1752 reg = rd32(hw, PF_MDET_TX_PQM); 1753 if (reg & PF_MDET_TX_PQM_VALID_M) { 1754 wr32(hw, PF_MDET_TX_PQM, 0xFFFF); 1755 if (netif_msg_tx_err(pf)) 1756 dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n"); 1757 } 1758 1759 reg = rd32(hw, PF_MDET_TX_TCLAN); 1760 if (reg & PF_MDET_TX_TCLAN_VALID_M) { 1761 wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); 1762 if (netif_msg_tx_err(pf)) 1763 dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n"); 1764 } 1765 1766 reg = rd32(hw, PF_MDET_RX); 1767 if (reg & PF_MDET_RX_VALID_M) { 1768 wr32(hw, PF_MDET_RX, 0xFFFF); 1769 if (netif_msg_rx_err(pf)) 1770 dev_info(dev, "Malicious Driver Detection event RX detected on PF\n"); 1771 } 1772 1773 /* Check to see if one of the VFs caused an MDD event, and then 1774 * increment counters and set print pending 1775 */ 1776 mutex_lock(&pf->vfs.table_lock); 1777 ice_for_each_vf(pf, bkt, vf) { 1778 reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); 1779 if (reg & VP_MDET_TX_PQM_VALID_M) { 1780 wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF); 1781 vf->mdd_tx_events.count++; 1782 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); 1783 if (netif_msg_tx_err(pf)) 1784 dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", 1785 vf->vf_id); 1786 } 1787 1788 reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id)); 1789 if (reg & VP_MDET_TX_TCLAN_VALID_M) { 1790 wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF); 1791 vf->mdd_tx_events.count++; 1792 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); 1793 if (netif_msg_tx_err(pf)) 1794 dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", 1795 vf->vf_id); 1796 } 1797 1798 reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id)); 1799 if (reg & VP_MDET_TX_TDPU_VALID_M) { 1800 wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF); 1801 vf->mdd_tx_events.count++; 1802 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); 1803 if (netif_msg_tx_err(pf)) 1804 dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", 1805 vf->vf_id); 1806 } 1807 1808 reg = rd32(hw, VP_MDET_RX(vf->vf_id)); 1809 if (reg & VP_MDET_RX_VALID_M) { 1810 wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF); 1811 vf->mdd_rx_events.count++; 1812 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); 1813 if (netif_msg_rx_err(pf)) 1814 dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", 1815 vf->vf_id); 1816 1817 /* Since the queue is disabled on VF Rx MDD events, the 1818 * PF can be configured to reset the VF through ethtool 1819 * private flag mdd-auto-reset-vf. 1820 */ 1821 if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) { 1822 /* VF MDD event counters will be cleared by 1823 * reset, so print the event prior to reset. 1824 */ 1825 ice_print_vf_rx_mdd_event(vf); 1826 ice_reset_vf(vf, ICE_VF_RESET_LOCK); 1827 } 1828 } 1829 } 1830 mutex_unlock(&pf->vfs.table_lock); 1831 1832 ice_print_vfs_mdd_events(pf); 1833 } 1834 1835 /** 1836 * ice_force_phys_link_state - Force the physical link state 1837 * @vsi: VSI to force the physical link state to up/down 1838 * @link_up: true/false indicates to set the physical link to up/down 1839 * 1840 * Force the physical link state by getting the current PHY capabilities from 1841 * hardware and setting the PHY config based on the determined capabilities. If 1842 * link changes a link event will be triggered because both the Enable Automatic 1843 * Link Update and LESM Enable bits are set when setting the PHY capabilities. 1844 * 1845 * Returns 0 on success, negative on failure 1846 */ 1847 static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) 1848 { 1849 struct ice_aqc_get_phy_caps_data *pcaps; 1850 struct ice_aqc_set_phy_cfg_data *cfg; 1851 struct ice_port_info *pi; 1852 struct device *dev; 1853 int retcode; 1854 1855 if (!vsi || !vsi->port_info || !vsi->back) 1856 return -EINVAL; 1857 if (vsi->type != ICE_VSI_PF) 1858 return 0; 1859 1860 dev = ice_pf_to_dev(vsi->back); 1861 1862 pi = vsi->port_info; 1863 1864 pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); 1865 if (!pcaps) 1866 return -ENOMEM; 1867 1868 retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, 1869 NULL); 1870 if (retcode) { 1871 dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", 1872 vsi->vsi_num, retcode); 1873 retcode = -EIO; 1874 goto out; 1875 } 1876 1877 /* No change in link */ 1878 if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) && 1879 link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) 1880 goto out; 1881 1882 /* Use the current user PHY configuration. The current user PHY 1883 * configuration is initialized during probe from PHY capabilities 1884 * software mode, and updated on set PHY configuration. 1885 */ 1886 cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL); 1887 if (!cfg) { 1888 retcode = -ENOMEM; 1889 goto out; 1890 } 1891 1892 cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; 1893 if (link_up) 1894 cfg->caps |= ICE_AQ_PHY_ENA_LINK; 1895 else 1896 cfg->caps &= ~ICE_AQ_PHY_ENA_LINK; 1897 1898 retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL); 1899 if (retcode) { 1900 dev_err(dev, "Failed to set phy config, VSI %d error %d\n", 1901 vsi->vsi_num, retcode); 1902 retcode = -EIO; 1903 } 1904 1905 kfree(cfg); 1906 out: 1907 kfree(pcaps); 1908 return retcode; 1909 } 1910 1911 /** 1912 * ice_init_nvm_phy_type - Initialize the NVM PHY type 1913 * @pi: port info structure 1914 * 1915 * Initialize nvm_phy_type_[low|high] for link lenient mode support 1916 */ 1917 static int ice_init_nvm_phy_type(struct ice_port_info *pi) 1918 { 1919 struct ice_aqc_get_phy_caps_data *pcaps; 1920 struct ice_pf *pf = pi->hw->back; 1921 int err; 1922 1923 pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); 1924 if (!pcaps) 1925 return -ENOMEM; 1926 1927 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, 1928 pcaps, NULL); 1929 1930 if (err) { 1931 dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n"); 1932 goto out; 1933 } 1934 1935 pf->nvm_phy_type_hi = pcaps->phy_type_high; 1936 pf->nvm_phy_type_lo = pcaps->phy_type_low; 1937 1938 out: 1939 kfree(pcaps); 1940 return err; 1941 } 1942 1943 /** 1944 * ice_init_link_dflt_override - Initialize link default override 1945 * @pi: port info structure 1946 * 1947 * Initialize link default override and PHY total port shutdown during probe 1948 */ 1949 static void ice_init_link_dflt_override(struct ice_port_info *pi) 1950 { 1951 struct ice_link_default_override_tlv *ldo; 1952 struct ice_pf *pf = pi->hw->back; 1953 1954 ldo = &pf->link_dflt_override; 1955 if (ice_get_link_default_override(ldo, pi)) 1956 return; 1957 1958 if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS)) 1959 return; 1960 1961 /* Enable Total Port Shutdown (override/replace link-down-on-close 1962 * ethtool private flag) for ports with Port Disable bit set. 1963 */ 1964 set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags); 1965 set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags); 1966 } 1967 1968 /** 1969 * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings 1970 * @pi: port info structure 1971 * 1972 * If default override is enabled, initialize the user PHY cfg speed and FEC 1973 * settings using the default override mask from the NVM. 1974 * 1975 * The PHY should only be configured with the default override settings the 1976 * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state 1977 * is used to indicate that the user PHY cfg default override is initialized 1978 * and the PHY has not been configured with the default override settings. The 1979 * state is set here, and cleared in ice_configure_phy the first time the PHY is 1980 * configured. 1981 * 1982 * This function should be called only if the FW doesn't support default 1983 * configuration mode, as reported by ice_fw_supports_report_dflt_cfg. 1984 */ 1985 static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) 1986 { 1987 struct ice_link_default_override_tlv *ldo; 1988 struct ice_aqc_set_phy_cfg_data *cfg; 1989 struct ice_phy_info *phy = &pi->phy; 1990 struct ice_pf *pf = pi->hw->back; 1991 1992 ldo = &pf->link_dflt_override; 1993 1994 /* If link default override is enabled, use to mask NVM PHY capabilities 1995 * for speed and FEC default configuration. 1996 */ 1997 cfg = &phy->curr_user_phy_cfg; 1998 1999 if (ldo->phy_type_low || ldo->phy_type_high) { 2000 cfg->phy_type_low = pf->nvm_phy_type_lo & 2001 cpu_to_le64(ldo->phy_type_low); 2002 cfg->phy_type_high = pf->nvm_phy_type_hi & 2003 cpu_to_le64(ldo->phy_type_high); 2004 } 2005 cfg->link_fec_opt = ldo->fec_options; 2006 phy->curr_user_fec_req = ICE_FEC_AUTO; 2007 2008 set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); 2009 } 2010 2011 /** 2012 * ice_init_phy_user_cfg - Initialize the PHY user configuration 2013 * @pi: port info structure 2014 * 2015 * Initialize the current user PHY configuration, speed, FEC, and FC requested 2016 * mode to default. The PHY defaults are from get PHY capabilities topology 2017 * with media so call when media is first available. An error is returned if 2018 * called when media is not available. The PHY initialization completed state is 2019 * set here. 2020 * 2021 * These configurations are used when setting PHY 2022 * configuration. The user PHY configuration is updated on set PHY 2023 * configuration. Returns 0 on success, negative on failure 2024 */ 2025 static int ice_init_phy_user_cfg(struct ice_port_info *pi) 2026 { 2027 struct ice_aqc_get_phy_caps_data *pcaps; 2028 struct ice_phy_info *phy = &pi->phy; 2029 struct ice_pf *pf = pi->hw->back; 2030 int err; 2031 2032 if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) 2033 return -EIO; 2034 2035 pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); 2036 if (!pcaps) 2037 return -ENOMEM; 2038 2039 if (ice_fw_supports_report_dflt_cfg(pi->hw)) 2040 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, 2041 pcaps, NULL); 2042 else 2043 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, 2044 pcaps, NULL); 2045 if (err) { 2046 dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n"); 2047 goto err_out; 2048 } 2049 2050 ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg); 2051 2052 /* check if lenient mode is supported and enabled */ 2053 if (ice_fw_supports_link_override(pi->hw) && 2054 !(pcaps->module_compliance_enforcement & 2055 ICE_AQC_MOD_ENFORCE_STRICT_MODE)) { 2056 set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags); 2057 2058 /* if the FW supports default PHY configuration mode, then the driver 2059 * does not have to apply link override settings. If not, 2060 * initialize user PHY configuration with link override values 2061 */ 2062 if (!ice_fw_supports_report_dflt_cfg(pi->hw) && 2063 (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) { 2064 ice_init_phy_cfg_dflt_override(pi); 2065 goto out; 2066 } 2067 } 2068 2069 /* if link default override is not enabled, set user flow control and 2070 * FEC settings based on what get_phy_caps returned 2071 */ 2072 phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps, 2073 pcaps->link_fec_options); 2074 phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps); 2075 2076 out: 2077 phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M; 2078 set_bit(ICE_PHY_INIT_COMPLETE, pf->state); 2079 err_out: 2080 kfree(pcaps); 2081 return err; 2082 } 2083 2084 /** 2085 * ice_configure_phy - configure PHY 2086 * @vsi: VSI of PHY 2087 * 2088 * Set the PHY configuration. If the current PHY configuration is the same as 2089 * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise 2090 * configure the based get PHY capabilities for topology with media. 2091 */ 2092 static int ice_configure_phy(struct ice_vsi *vsi) 2093 { 2094 struct device *dev = ice_pf_to_dev(vsi->back); 2095 struct ice_port_info *pi = vsi->port_info; 2096 struct ice_aqc_get_phy_caps_data *pcaps; 2097 struct ice_aqc_set_phy_cfg_data *cfg; 2098 struct ice_phy_info *phy = &pi->phy; 2099 struct ice_pf *pf = vsi->back; 2100 int err; 2101 2102 /* Ensure we have media as we cannot configure a medialess port */ 2103 if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) 2104 return -EPERM; 2105 2106 ice_print_topo_conflict(vsi); 2107 2108 if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) && 2109 phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) 2110 return -EPERM; 2111 2112 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) 2113 return ice_force_phys_link_state(vsi, true); 2114 2115 pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); 2116 if (!pcaps) 2117 return -ENOMEM; 2118 2119 /* Get current PHY config */ 2120 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, 2121 NULL); 2122 if (err) { 2123 dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n", 2124 vsi->vsi_num, err); 2125 goto done; 2126 } 2127 2128 /* If PHY enable link is configured and configuration has not changed, 2129 * there's nothing to do 2130 */ 2131 if (pcaps->caps & ICE_AQC_PHY_EN_LINK && 2132 ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg)) 2133 goto done; 2134 2135 /* Use PHY topology as baseline for configuration */ 2136 memset(pcaps, 0, sizeof(*pcaps)); 2137 if (ice_fw_supports_report_dflt_cfg(pi->hw)) 2138 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, 2139 pcaps, NULL); 2140 else 2141 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, 2142 pcaps, NULL); 2143 if (err) { 2144 dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n", 2145 vsi->vsi_num, err); 2146 goto done; 2147 } 2148 2149 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); 2150 if (!cfg) { 2151 err = -ENOMEM; 2152 goto done; 2153 } 2154 2155 ice_copy_phy_caps_to_cfg(pi, pcaps, cfg); 2156 2157 /* Speed - If default override pending, use curr_user_phy_cfg set in 2158 * ice_init_phy_user_cfg_ldo. 2159 */ 2160 if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, 2161 vsi->back->state)) { 2162 cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low; 2163 cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high; 2164 } else { 2165 u64 phy_low = 0, phy_high = 0; 2166 2167 ice_update_phy_type(&phy_low, &phy_high, 2168 pi->phy.curr_user_speed_req); 2169 cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low); 2170 cfg->phy_type_high = pcaps->phy_type_high & 2171 cpu_to_le64(phy_high); 2172 } 2173 2174 /* Can't provide what was requested; use PHY capabilities */ 2175 if (!cfg->phy_type_low && !cfg->phy_type_high) { 2176 cfg->phy_type_low = pcaps->phy_type_low; 2177 cfg->phy_type_high = pcaps->phy_type_high; 2178 } 2179 2180 /* FEC */ 2181 ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req); 2182 2183 /* Can't provide what was requested; use PHY capabilities */ 2184 if (cfg->link_fec_opt != 2185 (cfg->link_fec_opt & pcaps->link_fec_options)) { 2186 cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC; 2187 cfg->link_fec_opt = pcaps->link_fec_options; 2188 } 2189 2190 /* Flow Control - always supported; no need to check against 2191 * capabilities 2192 */ 2193 ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req); 2194 2195 /* Enable link and link update */ 2196 cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK; 2197 2198 err = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL); 2199 if (err) 2200 dev_err(dev, "Failed to set phy config, VSI %d error %d\n", 2201 vsi->vsi_num, err); 2202 2203 kfree(cfg); 2204 done: 2205 kfree(pcaps); 2206 return err; 2207 } 2208 2209 /** 2210 * ice_check_media_subtask - Check for media 2211 * @pf: pointer to PF struct 2212 * 2213 * If media is available, then initialize PHY user configuration if it is not 2214 * been, and configure the PHY if the interface is up. 2215 */ 2216 static void ice_check_media_subtask(struct ice_pf *pf) 2217 { 2218 struct ice_port_info *pi; 2219 struct ice_vsi *vsi; 2220 int err; 2221 2222 /* No need to check for media if it's already present */ 2223 if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags)) 2224 return; 2225 2226 vsi = ice_get_main_vsi(pf); 2227 if (!vsi) 2228 return; 2229 2230 /* Refresh link info and check if media is present */ 2231 pi = vsi->port_info; 2232 err = ice_update_link_info(pi); 2233 if (err) 2234 return; 2235 2236 ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); 2237 2238 if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { 2239 if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) 2240 ice_init_phy_user_cfg(pi); 2241 2242 /* PHY settings are reset on media insertion, reconfigure 2243 * PHY to preserve settings. 2244 */ 2245 if (test_bit(ICE_VSI_DOWN, vsi->state) && 2246 test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) 2247 return; 2248 2249 err = ice_configure_phy(vsi); 2250 if (!err) 2251 clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); 2252 2253 /* A Link Status Event will be generated; the event handler 2254 * will complete bringing the interface up 2255 */ 2256 } 2257 } 2258 2259 /** 2260 * ice_service_task - manage and run subtasks 2261 * @work: pointer to work_struct contained by the PF struct 2262 */ 2263 static void ice_service_task(struct work_struct *work) 2264 { 2265 struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); 2266 unsigned long start_time = jiffies; 2267 2268 /* subtasks */ 2269 2270 /* process reset requests first */ 2271 ice_reset_subtask(pf); 2272 2273 /* bail if a reset/recovery cycle is pending or rebuild failed */ 2274 if (ice_is_reset_in_progress(pf->state) || 2275 test_bit(ICE_SUSPENDED, pf->state) || 2276 test_bit(ICE_NEEDS_RESTART, pf->state)) { 2277 ice_service_task_complete(pf); 2278 return; 2279 } 2280 2281 if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { 2282 struct iidc_event *event; 2283 2284 event = kzalloc(sizeof(*event), GFP_KERNEL); 2285 if (event) { 2286 set_bit(IIDC_EVENT_CRIT_ERR, event->type); 2287 /* report the entire OICR value to AUX driver */ 2288 swap(event->reg, pf->oicr_err_reg); 2289 ice_send_event_to_aux(pf, event); 2290 kfree(event); 2291 } 2292 } 2293 2294 if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) { 2295 /* Plug aux device per request */ 2296 ice_plug_aux_dev(pf); 2297 2298 /* Mark plugging as done but check whether unplug was 2299 * requested during ice_plug_aux_dev() call 2300 * (e.g. from ice_clear_rdma_cap()) and if so then 2301 * plug aux device. 2302 */ 2303 if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) 2304 ice_unplug_aux_dev(pf); 2305 } 2306 2307 if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) { 2308 struct iidc_event *event; 2309 2310 event = kzalloc(sizeof(*event), GFP_KERNEL); 2311 if (event) { 2312 set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type); 2313 ice_send_event_to_aux(pf, event); 2314 kfree(event); 2315 } 2316 } 2317 2318 ice_clean_adminq_subtask(pf); 2319 ice_check_media_subtask(pf); 2320 ice_check_for_hang_subtask(pf); 2321 ice_sync_fltr_subtask(pf); 2322 ice_handle_mdd_event(pf); 2323 ice_watchdog_subtask(pf); 2324 2325 if (ice_is_safe_mode(pf)) { 2326 ice_service_task_complete(pf); 2327 return; 2328 } 2329 2330 ice_process_vflr_event(pf); 2331 ice_clean_mailboxq_subtask(pf); 2332 ice_clean_sbq_subtask(pf); 2333 ice_sync_arfs_fltrs(pf); 2334 ice_flush_fdir_ctx(pf); 2335 2336 /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */ 2337 ice_service_task_complete(pf); 2338 2339 /* If the tasks have taken longer than one service timer period 2340 * or there is more work to be done, reset the service timer to 2341 * schedule the service task now. 2342 */ 2343 if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || 2344 test_bit(ICE_MDD_EVENT_PENDING, pf->state) || 2345 test_bit(ICE_VFLR_EVENT_PENDING, pf->state) || 2346 test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) || 2347 test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) || 2348 test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) || 2349 test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) 2350 mod_timer(&pf->serv_tmr, jiffies); 2351 } 2352 2353 /** 2354 * ice_set_ctrlq_len - helper function to set controlq length 2355 * @hw: pointer to the HW instance 2356 */ 2357 static void ice_set_ctrlq_len(struct ice_hw *hw) 2358 { 2359 hw->adminq.num_rq_entries = ICE_AQ_LEN; 2360 hw->adminq.num_sq_entries = ICE_AQ_LEN; 2361 hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; 2362 hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; 2363 hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M; 2364 hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN; 2365 hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; 2366 hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; 2367 hw->sbq.num_rq_entries = ICE_SBQ_LEN; 2368 hw->sbq.num_sq_entries = ICE_SBQ_LEN; 2369 hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN; 2370 hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN; 2371 } 2372 2373 /** 2374 * ice_schedule_reset - schedule a reset 2375 * @pf: board private structure 2376 * @reset: reset being requested 2377 */ 2378 int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) 2379 { 2380 struct device *dev = ice_pf_to_dev(pf); 2381 2382 /* bail out if earlier reset has failed */ 2383 if (test_bit(ICE_RESET_FAILED, pf->state)) { 2384 dev_dbg(dev, "earlier reset has failed\n"); 2385 return -EIO; 2386 } 2387 /* bail if reset/recovery already in progress */ 2388 if (ice_is_reset_in_progress(pf->state)) { 2389 dev_dbg(dev, "Reset already in progress\n"); 2390 return -EBUSY; 2391 } 2392 2393 ice_unplug_aux_dev(pf); 2394 2395 switch (reset) { 2396 case ICE_RESET_PFR: 2397 set_bit(ICE_PFR_REQ, pf->state); 2398 break; 2399 case ICE_RESET_CORER: 2400 set_bit(ICE_CORER_REQ, pf->state); 2401 break; 2402 case ICE_RESET_GLOBR: 2403 set_bit(ICE_GLOBR_REQ, pf->state); 2404 break; 2405 default: 2406 return -EINVAL; 2407 } 2408 2409 ice_service_task_schedule(pf); 2410 return 0; 2411 } 2412 2413 /** 2414 * ice_irq_affinity_notify - Callback for affinity changes 2415 * @notify: context as to what irq was changed 2416 * @mask: the new affinity mask 2417 * 2418 * This is a callback function used by the irq_set_affinity_notifier function 2419 * so that we may register to receive changes to the irq affinity masks. 2420 */ 2421 static void 2422 ice_irq_affinity_notify(struct irq_affinity_notify *notify, 2423 const cpumask_t *mask) 2424 { 2425 struct ice_q_vector *q_vector = 2426 container_of(notify, struct ice_q_vector, affinity_notify); 2427 2428 cpumask_copy(&q_vector->affinity_mask, mask); 2429 } 2430 2431 /** 2432 * ice_irq_affinity_release - Callback for affinity notifier release 2433 * @ref: internal core kernel usage 2434 * 2435 * This is a callback function used by the irq_set_affinity_notifier function 2436 * to inform the current notification subscriber that they will no longer 2437 * receive notifications. 2438 */ 2439 static void ice_irq_affinity_release(struct kref __always_unused *ref) {} 2440 2441 /** 2442 * ice_vsi_ena_irq - Enable IRQ for the given VSI 2443 * @vsi: the VSI being configured 2444 */ 2445 static int ice_vsi_ena_irq(struct ice_vsi *vsi) 2446 { 2447 struct ice_hw *hw = &vsi->back->hw; 2448 int i; 2449 2450 ice_for_each_q_vector(vsi, i) 2451 ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]); 2452 2453 ice_flush(hw); 2454 return 0; 2455 } 2456 2457 /** 2458 * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI 2459 * @vsi: the VSI being configured 2460 * @basename: name for the vector 2461 */ 2462 static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) 2463 { 2464 int q_vectors = vsi->num_q_vectors; 2465 struct ice_pf *pf = vsi->back; 2466 int base = vsi->base_vector; 2467 struct device *dev; 2468 int rx_int_idx = 0; 2469 int tx_int_idx = 0; 2470 int vector, err; 2471 int irq_num; 2472 2473 dev = ice_pf_to_dev(pf); 2474 for (vector = 0; vector < q_vectors; vector++) { 2475 struct ice_q_vector *q_vector = vsi->q_vectors[vector]; 2476 2477 irq_num = pf->msix_entries[base + vector].vector; 2478 2479 if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) { 2480 snprintf(q_vector->name, sizeof(q_vector->name) - 1, 2481 "%s-%s-%d", basename, "TxRx", rx_int_idx++); 2482 tx_int_idx++; 2483 } else if (q_vector->rx.rx_ring) { 2484 snprintf(q_vector->name, sizeof(q_vector->name) - 1, 2485 "%s-%s-%d", basename, "rx", rx_int_idx++); 2486 } else if (q_vector->tx.tx_ring) { 2487 snprintf(q_vector->name, sizeof(q_vector->name) - 1, 2488 "%s-%s-%d", basename, "tx", tx_int_idx++); 2489 } else { 2490 /* skip this unused q_vector */ 2491 continue; 2492 } 2493 if (vsi->type == ICE_VSI_CTRL && vsi->vf) 2494 err = devm_request_irq(dev, irq_num, vsi->irq_handler, 2495 IRQF_SHARED, q_vector->name, 2496 q_vector); 2497 else 2498 err = devm_request_irq(dev, irq_num, vsi->irq_handler, 2499 0, q_vector->name, q_vector); 2500 if (err) { 2501 netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", 2502 err); 2503 goto free_q_irqs; 2504 } 2505 2506 /* register for affinity change notifications */ 2507 if (!IS_ENABLED(CONFIG_RFS_ACCEL)) { 2508 struct irq_affinity_notify *affinity_notify; 2509 2510 affinity_notify = &q_vector->affinity_notify; 2511 affinity_notify->notify = ice_irq_affinity_notify; 2512 affinity_notify->release = ice_irq_affinity_release; 2513 irq_set_affinity_notifier(irq_num, affinity_notify); 2514 } 2515 2516 /* assign the mask for this irq */ 2517 irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); 2518 } 2519 2520 vsi->irqs_ready = true; 2521 return 0; 2522 2523 free_q_irqs: 2524 while (vector) { 2525 vector--; 2526 irq_num = pf->msix_entries[base + vector].vector; 2527 if (!IS_ENABLED(CONFIG_RFS_ACCEL)) 2528 irq_set_affinity_notifier(irq_num, NULL); 2529 irq_set_affinity_hint(irq_num, NULL); 2530 devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]); 2531 } 2532 return err; 2533 } 2534 2535 /** 2536 * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP 2537 * @vsi: VSI to setup Tx rings used by XDP 2538 * 2539 * Return 0 on success and negative value on error 2540 */ 2541 static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) 2542 { 2543 struct device *dev = ice_pf_to_dev(vsi->back); 2544 struct ice_tx_desc *tx_desc; 2545 int i, j; 2546 2547 ice_for_each_xdp_txq(vsi, i) { 2548 u16 xdp_q_idx = vsi->alloc_txq + i; 2549 struct ice_tx_ring *xdp_ring; 2550 2551 xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); 2552 2553 if (!xdp_ring) 2554 goto free_xdp_rings; 2555 2556 xdp_ring->q_index = xdp_q_idx; 2557 xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; 2558 xdp_ring->vsi = vsi; 2559 xdp_ring->netdev = NULL; 2560 xdp_ring->dev = dev; 2561 xdp_ring->count = vsi->num_tx_desc; 2562 xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; 2563 xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1; 2564 WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); 2565 if (ice_setup_tx_ring(xdp_ring)) 2566 goto free_xdp_rings; 2567 ice_set_ring_xdp(xdp_ring); 2568 xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); 2569 spin_lock_init(&xdp_ring->tx_lock); 2570 for (j = 0; j < xdp_ring->count; j++) { 2571 tx_desc = ICE_TX_DESC(xdp_ring, j); 2572 tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE); 2573 } 2574 } 2575 2576 ice_for_each_rxq(vsi, i) { 2577 if (static_key_enabled(&ice_xdp_locking_key)) 2578 vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; 2579 else 2580 vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; 2581 } 2582 2583 return 0; 2584 2585 free_xdp_rings: 2586 for (; i >= 0; i--) 2587 if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) 2588 ice_free_tx_ring(vsi->xdp_rings[i]); 2589 return -ENOMEM; 2590 } 2591 2592 /** 2593 * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI 2594 * @vsi: VSI to set the bpf prog on 2595 * @prog: the bpf prog pointer 2596 */ 2597 static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) 2598 { 2599 struct bpf_prog *old_prog; 2600 int i; 2601 2602 old_prog = xchg(&vsi->xdp_prog, prog); 2603 if (old_prog) 2604 bpf_prog_put(old_prog); 2605 2606 ice_for_each_rxq(vsi, i) 2607 WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); 2608 } 2609 2610 /** 2611 * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP 2612 * @vsi: VSI to bring up Tx rings used by XDP 2613 * @prog: bpf program that will be assigned to VSI 2614 * 2615 * Return 0 on success and negative value on error 2616 */ 2617 int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) 2618 { 2619 u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; 2620 int xdp_rings_rem = vsi->num_xdp_txq; 2621 struct ice_pf *pf = vsi->back; 2622 struct ice_qs_cfg xdp_qs_cfg = { 2623 .qs_mutex = &pf->avail_q_mutex, 2624 .pf_map = pf->avail_txqs, 2625 .pf_map_size = pf->max_pf_txqs, 2626 .q_count = vsi->num_xdp_txq, 2627 .scatter_count = ICE_MAX_SCATTER_TXQS, 2628 .vsi_map = vsi->txq_map, 2629 .vsi_map_offset = vsi->alloc_txq, 2630 .mapping_mode = ICE_VSI_MAP_CONTIG 2631 }; 2632 struct device *dev; 2633 int i, v_idx; 2634 int status; 2635 2636 dev = ice_pf_to_dev(pf); 2637 vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq, 2638 sizeof(*vsi->xdp_rings), GFP_KERNEL); 2639 if (!vsi->xdp_rings) 2640 return -ENOMEM; 2641 2642 vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode; 2643 if (__ice_vsi_get_qs(&xdp_qs_cfg)) 2644 goto err_map_xdp; 2645 2646 if (static_key_enabled(&ice_xdp_locking_key)) 2647 netdev_warn(vsi->netdev, 2648 "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n"); 2649 2650 if (ice_xdp_alloc_setup_rings(vsi)) 2651 goto clear_xdp_rings; 2652 2653 /* follow the logic from ice_vsi_map_rings_to_vectors */ 2654 ice_for_each_q_vector(vsi, v_idx) { 2655 struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; 2656 int xdp_rings_per_v, q_id, q_base; 2657 2658 xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem, 2659 vsi->num_q_vectors - v_idx); 2660 q_base = vsi->num_xdp_txq - xdp_rings_rem; 2661 2662 for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { 2663 struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id]; 2664 2665 xdp_ring->q_vector = q_vector; 2666 xdp_ring->next = q_vector->tx.tx_ring; 2667 q_vector->tx.tx_ring = xdp_ring; 2668 } 2669 xdp_rings_rem -= xdp_rings_per_v; 2670 } 2671 2672 /* omit the scheduler update if in reset path; XDP queues will be 2673 * taken into account at the end of ice_vsi_rebuild, where 2674 * ice_cfg_vsi_lan is being called 2675 */ 2676 if (ice_is_reset_in_progress(pf->state)) 2677 return 0; 2678 2679 /* tell the Tx scheduler that right now we have 2680 * additional queues 2681 */ 2682 for (i = 0; i < vsi->tc_cfg.numtc; i++) 2683 max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq; 2684 2685 status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, 2686 max_txqs); 2687 if (status) { 2688 dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n", 2689 status); 2690 goto clear_xdp_rings; 2691 } 2692 2693 /* assign the prog only when it's not already present on VSI; 2694 * this flow is a subject of both ethtool -L and ndo_bpf flows; 2695 * VSI rebuild that happens under ethtool -L can expose us to 2696 * the bpf_prog refcount issues as we would be swapping same 2697 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put 2698 * on it as it would be treated as an 'old_prog'; for ndo_bpf 2699 * this is not harmful as dev_xdp_install bumps the refcount 2700 * before calling the op exposed by the driver; 2701 */ 2702 if (!ice_is_xdp_ena_vsi(vsi)) 2703 ice_vsi_assign_bpf_prog(vsi, prog); 2704 2705 return 0; 2706 clear_xdp_rings: 2707 ice_for_each_xdp_txq(vsi, i) 2708 if (vsi->xdp_rings[i]) { 2709 kfree_rcu(vsi->xdp_rings[i], rcu); 2710 vsi->xdp_rings[i] = NULL; 2711 } 2712 2713 err_map_xdp: 2714 mutex_lock(&pf->avail_q_mutex); 2715 ice_for_each_xdp_txq(vsi, i) { 2716 clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); 2717 vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; 2718 } 2719 mutex_unlock(&pf->avail_q_mutex); 2720 2721 devm_kfree(dev, vsi->xdp_rings); 2722 return -ENOMEM; 2723 } 2724 2725 /** 2726 * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings 2727 * @vsi: VSI to remove XDP rings 2728 * 2729 * Detach XDP rings from irq vectors, clean up the PF bitmap and free 2730 * resources 2731 */ 2732 int ice_destroy_xdp_rings(struct ice_vsi *vsi) 2733 { 2734 u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; 2735 struct ice_pf *pf = vsi->back; 2736 int i, v_idx; 2737 2738 /* q_vectors are freed in reset path so there's no point in detaching 2739 * rings; in case of rebuild being triggered not from reset bits 2740 * in pf->state won't be set, so additionally check first q_vector 2741 * against NULL 2742 */ 2743 if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) 2744 goto free_qmap; 2745 2746 ice_for_each_q_vector(vsi, v_idx) { 2747 struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; 2748 struct ice_tx_ring *ring; 2749 2750 ice_for_each_tx_ring(ring, q_vector->tx) 2751 if (!ring->tx_buf || !ice_ring_is_xdp(ring)) 2752 break; 2753 2754 /* restore the value of last node prior to XDP setup */ 2755 q_vector->tx.tx_ring = ring; 2756 } 2757 2758 free_qmap: 2759 mutex_lock(&pf->avail_q_mutex); 2760 ice_for_each_xdp_txq(vsi, i) { 2761 clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); 2762 vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; 2763 } 2764 mutex_unlock(&pf->avail_q_mutex); 2765 2766 ice_for_each_xdp_txq(vsi, i) 2767 if (vsi->xdp_rings[i]) { 2768 if (vsi->xdp_rings[i]->desc) 2769 ice_free_tx_ring(vsi->xdp_rings[i]); 2770 kfree_rcu(vsi->xdp_rings[i], rcu); 2771 vsi->xdp_rings[i] = NULL; 2772 } 2773 2774 devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings); 2775 vsi->xdp_rings = NULL; 2776 2777 if (static_key_enabled(&ice_xdp_locking_key)) 2778 static_branch_dec(&ice_xdp_locking_key); 2779 2780 if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) 2781 return 0; 2782 2783 ice_vsi_assign_bpf_prog(vsi, NULL); 2784 2785 /* notify Tx scheduler that we destroyed XDP queues and bring 2786 * back the old number of child nodes 2787 */ 2788 for (i = 0; i < vsi->tc_cfg.numtc; i++) 2789 max_txqs[i] = vsi->num_txq; 2790 2791 /* change number of XDP Tx queues to 0 */ 2792 vsi->num_xdp_txq = 0; 2793 2794 return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, 2795 max_txqs); 2796 } 2797 2798 /** 2799 * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI 2800 * @vsi: VSI to schedule napi on 2801 */ 2802 static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) 2803 { 2804 int i; 2805 2806 ice_for_each_rxq(vsi, i) { 2807 struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; 2808 2809 if (rx_ring->xsk_pool) 2810 napi_schedule(&rx_ring->q_vector->napi); 2811 } 2812 } 2813 2814 /** 2815 * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have 2816 * @vsi: VSI to determine the count of XDP Tx qs 2817 * 2818 * returns 0 if Tx qs count is higher than at least half of CPU count, 2819 * -ENOMEM otherwise 2820 */ 2821 int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) 2822 { 2823 u16 avail = ice_get_avail_txq_count(vsi->back); 2824 u16 cpus = num_possible_cpus(); 2825 2826 if (avail < cpus / 2) 2827 return -ENOMEM; 2828 2829 vsi->num_xdp_txq = min_t(u16, avail, cpus); 2830 2831 if (vsi->num_xdp_txq < cpus) 2832 static_branch_inc(&ice_xdp_locking_key); 2833 2834 return 0; 2835 } 2836 2837 /** 2838 * ice_xdp_setup_prog - Add or remove XDP eBPF program 2839 * @vsi: VSI to setup XDP for 2840 * @prog: XDP program 2841 * @extack: netlink extended ack 2842 */ 2843 static int 2844 ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, 2845 struct netlink_ext_ack *extack) 2846 { 2847 int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; 2848 bool if_running = netif_running(vsi->netdev); 2849 int ret = 0, xdp_ring_err = 0; 2850 2851 if (frame_size > vsi->rx_buf_len) { 2852 NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); 2853 return -EOPNOTSUPP; 2854 } 2855 2856 /* need to stop netdev while setting up the program for Rx rings */ 2857 if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { 2858 ret = ice_down(vsi); 2859 if (ret) { 2860 NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); 2861 return ret; 2862 } 2863 } 2864 2865 if (!ice_is_xdp_ena_vsi(vsi) && prog) { 2866 xdp_ring_err = ice_vsi_determine_xdp_res(vsi); 2867 if (xdp_ring_err) { 2868 NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); 2869 } else { 2870 xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); 2871 if (xdp_ring_err) 2872 NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); 2873 } 2874 } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { 2875 xdp_ring_err = ice_destroy_xdp_rings(vsi); 2876 if (xdp_ring_err) 2877 NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); 2878 } else { 2879 /* safe to call even when prog == vsi->xdp_prog as 2880 * dev_xdp_install in net/core/dev.c incremented prog's 2881 * refcount so corresponding bpf_prog_put won't cause 2882 * underflow 2883 */ 2884 ice_vsi_assign_bpf_prog(vsi, prog); 2885 } 2886 2887 if (if_running) 2888 ret = ice_up(vsi); 2889 2890 if (!ret && prog) 2891 ice_vsi_rx_napi_schedule(vsi); 2892 2893 return (ret || xdp_ring_err) ? -ENOMEM : 0; 2894 } 2895 2896 /** 2897 * ice_xdp_safe_mode - XDP handler for safe mode 2898 * @dev: netdevice 2899 * @xdp: XDP command 2900 */ 2901 static int ice_xdp_safe_mode(struct net_device __always_unused *dev, 2902 struct netdev_bpf *xdp) 2903 { 2904 NL_SET_ERR_MSG_MOD(xdp->extack, 2905 "Please provide working DDP firmware package in order to use XDP\n" 2906 "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst"); 2907 return -EOPNOTSUPP; 2908 } 2909 2910 /** 2911 * ice_xdp - implements XDP handler 2912 * @dev: netdevice 2913 * @xdp: XDP command 2914 */ 2915 static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) 2916 { 2917 struct ice_netdev_priv *np = netdev_priv(dev); 2918 struct ice_vsi *vsi = np->vsi; 2919 2920 if (vsi->type != ICE_VSI_PF) { 2921 NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI"); 2922 return -EINVAL; 2923 } 2924 2925 switch (xdp->command) { 2926 case XDP_SETUP_PROG: 2927 return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); 2928 case XDP_SETUP_XSK_POOL: 2929 return ice_xsk_pool_setup(vsi, xdp->xsk.pool, 2930 xdp->xsk.queue_id); 2931 default: 2932 return -EINVAL; 2933 } 2934 } 2935 2936 /** 2937 * ice_ena_misc_vector - enable the non-queue interrupts 2938 * @pf: board private structure 2939 */ 2940 static void ice_ena_misc_vector(struct ice_pf *pf) 2941 { 2942 struct ice_hw *hw = &pf->hw; 2943 u32 val; 2944 2945 /* Disable anti-spoof detection interrupt to prevent spurious event 2946 * interrupts during a function reset. Anti-spoof functionally is 2947 * still supported. 2948 */ 2949 val = rd32(hw, GL_MDCK_TX_TDPU); 2950 val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M; 2951 wr32(hw, GL_MDCK_TX_TDPU, val); 2952 2953 /* clear things first */ 2954 wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ 2955 rd32(hw, PFINT_OICR); /* read to clear */ 2956 2957 val = (PFINT_OICR_ECC_ERR_M | 2958 PFINT_OICR_MAL_DETECT_M | 2959 PFINT_OICR_GRST_M | 2960 PFINT_OICR_PCI_EXCEPTION_M | 2961 PFINT_OICR_VFLR_M | 2962 PFINT_OICR_HMC_ERR_M | 2963 PFINT_OICR_PE_PUSH_M | 2964 PFINT_OICR_PE_CRITERR_M); 2965 2966 wr32(hw, PFINT_OICR_ENA, val); 2967 2968 /* SW_ITR_IDX = 0, but don't change INTENA */ 2969 wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), 2970 GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); 2971 } 2972 2973 /** 2974 * ice_misc_intr - misc interrupt handler 2975 * @irq: interrupt number 2976 * @data: pointer to a q_vector 2977 */ 2978 static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) 2979 { 2980 struct ice_pf *pf = (struct ice_pf *)data; 2981 struct ice_hw *hw = &pf->hw; 2982 irqreturn_t ret = IRQ_NONE; 2983 struct device *dev; 2984 u32 oicr, ena_mask; 2985 2986 dev = ice_pf_to_dev(pf); 2987 set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); 2988 set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); 2989 set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); 2990 2991 oicr = rd32(hw, PFINT_OICR); 2992 ena_mask = rd32(hw, PFINT_OICR_ENA); 2993 2994 if (oicr & PFINT_OICR_SWINT_M) { 2995 ena_mask &= ~PFINT_OICR_SWINT_M; 2996 pf->sw_int_count++; 2997 } 2998 2999 if (oicr & PFINT_OICR_MAL_DETECT_M) { 3000 ena_mask &= ~PFINT_OICR_MAL_DETECT_M; 3001 set_bit(ICE_MDD_EVENT_PENDING, pf->state); 3002 } 3003 if (oicr & PFINT_OICR_VFLR_M) { 3004 /* disable any further VFLR event notifications */ 3005 if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { 3006 u32 reg = rd32(hw, PFINT_OICR_ENA); 3007 3008 reg &= ~PFINT_OICR_VFLR_M; 3009 wr32(hw, PFINT_OICR_ENA, reg); 3010 } else { 3011 ena_mask &= ~PFINT_OICR_VFLR_M; 3012 set_bit(ICE_VFLR_EVENT_PENDING, pf->state); 3013 } 3014 } 3015 3016 if (oicr & PFINT_OICR_GRST_M) { 3017 u32 reset; 3018 3019 /* we have a reset warning */ 3020 ena_mask &= ~PFINT_OICR_GRST_M; 3021 reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> 3022 GLGEN_RSTAT_RESET_TYPE_S; 3023 3024 if (reset == ICE_RESET_CORER) 3025 pf->corer_count++; 3026 else if (reset == ICE_RESET_GLOBR) 3027 pf->globr_count++; 3028 else if (reset == ICE_RESET_EMPR) 3029 pf->empr_count++; 3030 else 3031 dev_dbg(dev, "Invalid reset type %d\n", reset); 3032 3033 /* If a reset cycle isn't already in progress, we set a bit in 3034 * pf->state so that the service task can start a reset/rebuild. 3035 */ 3036 if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) { 3037 if (reset == ICE_RESET_CORER) 3038 set_bit(ICE_CORER_RECV, pf->state); 3039 else if (reset == ICE_RESET_GLOBR) 3040 set_bit(ICE_GLOBR_RECV, pf->state); 3041 else 3042 set_bit(ICE_EMPR_RECV, pf->state); 3043 3044 /* There are couple of different bits at play here. 3045 * hw->reset_ongoing indicates whether the hardware is 3046 * in reset. This is set to true when a reset interrupt 3047 * is received and set back to false after the driver 3048 * has determined that the hardware is out of reset. 3049 * 3050 * ICE_RESET_OICR_RECV in pf->state indicates 3051 * that a post reset rebuild is required before the 3052 * driver is operational again. This is set above. 3053 * 3054 * As this is the start of the reset/rebuild cycle, set 3055 * both to indicate that. 3056 */ 3057 hw->reset_ongoing = true; 3058 } 3059 } 3060 3061 if (oicr & PFINT_OICR_TSYN_TX_M) { 3062 ena_mask &= ~PFINT_OICR_TSYN_TX_M; 3063 ice_ptp_process_ts(pf); 3064 } 3065 3066 if (oicr & PFINT_OICR_TSYN_EVNT_M) { 3067 u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; 3068 u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx)); 3069 3070 /* Save EVENTs from GTSYN register */ 3071 pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M | 3072 GLTSYN_STAT_EVENT1_M | 3073 GLTSYN_STAT_EVENT2_M); 3074 ena_mask &= ~PFINT_OICR_TSYN_EVNT_M; 3075 kthread_queue_work(pf->ptp.kworker, &pf->ptp.extts_work); 3076 } 3077 3078 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) 3079 if (oicr & ICE_AUX_CRIT_ERR) { 3080 pf->oicr_err_reg |= oicr; 3081 set_bit(ICE_AUX_ERR_PENDING, pf->state); 3082 ena_mask &= ~ICE_AUX_CRIT_ERR; 3083 } 3084 3085 /* Report any remaining unexpected interrupts */ 3086 oicr &= ena_mask; 3087 if (oicr) { 3088 dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr); 3089 /* If a critical error is pending there is no choice but to 3090 * reset the device. 3091 */ 3092 if (oicr & (PFINT_OICR_PCI_EXCEPTION_M | 3093 PFINT_OICR_ECC_ERR_M)) { 3094 set_bit(ICE_PFR_REQ, pf->state); 3095 ice_service_task_schedule(pf); 3096 } 3097 } 3098 ret = IRQ_HANDLED; 3099 3100 ice_service_task_schedule(pf); 3101 ice_irq_dynamic_ena(hw, NULL, NULL); 3102 3103 return ret; 3104 } 3105 3106 /** 3107 * ice_dis_ctrlq_interrupts - disable control queue interrupts 3108 * @hw: pointer to HW structure 3109 */ 3110 static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) 3111 { 3112 /* disable Admin queue Interrupt causes */ 3113 wr32(hw, PFINT_FW_CTL, 3114 rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M); 3115 3116 /* disable Mailbox queue Interrupt causes */ 3117 wr32(hw, PFINT_MBX_CTL, 3118 rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M); 3119 3120 wr32(hw, PFINT_SB_CTL, 3121 rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M); 3122 3123 /* disable Control queue Interrupt causes */ 3124 wr32(hw, PFINT_OICR_CTL, 3125 rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M); 3126 3127 ice_flush(hw); 3128 } 3129 3130 /** 3131 * ice_free_irq_msix_misc - Unroll misc vector setup 3132 * @pf: board private structure 3133 */ 3134 static void ice_free_irq_msix_misc(struct ice_pf *pf) 3135 { 3136 struct ice_hw *hw = &pf->hw; 3137 3138 ice_dis_ctrlq_interrupts(hw); 3139 3140 /* disable OICR interrupt */ 3141 wr32(hw, PFINT_OICR_ENA, 0); 3142 ice_flush(hw); 3143 3144 if (pf->msix_entries) { 3145 synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); 3146 devm_free_irq(ice_pf_to_dev(pf), 3147 pf->msix_entries[pf->oicr_idx].vector, pf); 3148 } 3149 3150 pf->num_avail_sw_msix += 1; 3151 ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); 3152 } 3153 3154 /** 3155 * ice_ena_ctrlq_interrupts - enable control queue interrupts 3156 * @hw: pointer to HW structure 3157 * @reg_idx: HW vector index to associate the control queue interrupts with 3158 */ 3159 static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx) 3160 { 3161 u32 val; 3162 3163 val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) | 3164 PFINT_OICR_CTL_CAUSE_ENA_M); 3165 wr32(hw, PFINT_OICR_CTL, val); 3166 3167 /* enable Admin queue Interrupt causes */ 3168 val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) | 3169 PFINT_FW_CTL_CAUSE_ENA_M); 3170 wr32(hw, PFINT_FW_CTL, val); 3171 3172 /* enable Mailbox queue Interrupt causes */ 3173 val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) | 3174 PFINT_MBX_CTL_CAUSE_ENA_M); 3175 wr32(hw, PFINT_MBX_CTL, val); 3176 3177 /* This enables Sideband queue Interrupt causes */ 3178 val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | 3179 PFINT_SB_CTL_CAUSE_ENA_M); 3180 wr32(hw, PFINT_SB_CTL, val); 3181 3182 ice_flush(hw); 3183 } 3184 3185 /** 3186 * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events 3187 * @pf: board private structure 3188 * 3189 * This sets up the handler for MSIX 0, which is used to manage the 3190 * non-queue interrupts, e.g. AdminQ and errors. This is not used 3191 * when in MSI or Legacy interrupt mode. 3192 */ 3193 static int ice_req_irq_msix_misc(struct ice_pf *pf) 3194 { 3195 struct device *dev = ice_pf_to_dev(pf); 3196 struct ice_hw *hw = &pf->hw; 3197 int oicr_idx, err = 0; 3198 3199 if (!pf->int_name[0]) 3200 snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", 3201 dev_driver_string(dev), dev_name(dev)); 3202 3203 /* Do not request IRQ but do enable OICR interrupt since settings are 3204 * lost during reset. Note that this function is called only during 3205 * rebuild path and not while reset is in progress. 3206 */ 3207 if (ice_is_reset_in_progress(pf->state)) 3208 goto skip_req_irq; 3209 3210 /* reserve one vector in irq_tracker for misc interrupts */ 3211 oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); 3212 if (oicr_idx < 0) 3213 return oicr_idx; 3214 3215 pf->num_avail_sw_msix -= 1; 3216 pf->oicr_idx = (u16)oicr_idx; 3217 3218 err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector, 3219 ice_misc_intr, 0, pf->int_name, pf); 3220 if (err) { 3221 dev_err(dev, "devm_request_irq for %s failed: %d\n", 3222 pf->int_name, err); 3223 ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); 3224 pf->num_avail_sw_msix += 1; 3225 return err; 3226 } 3227 3228 skip_req_irq: 3229 ice_ena_misc_vector(pf); 3230 3231 ice_ena_ctrlq_interrupts(hw, pf->oicr_idx); 3232 wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), 3233 ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); 3234 3235 ice_flush(hw); 3236 ice_irq_dynamic_ena(hw, NULL, NULL); 3237 3238 return 0; 3239 } 3240 3241 /** 3242 * ice_napi_add - register NAPI handler for the VSI 3243 * @vsi: VSI for which NAPI handler is to be registered 3244 * 3245 * This function is only called in the driver's load path. Registering the NAPI 3246 * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume, 3247 * reset/rebuild, etc.) 3248 */ 3249 static void ice_napi_add(struct ice_vsi *vsi) 3250 { 3251 int v_idx; 3252 3253 if (!vsi->netdev) 3254 return; 3255 3256 ice_for_each_q_vector(vsi, v_idx) 3257 netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, 3258 ice_napi_poll, NAPI_POLL_WEIGHT); 3259 } 3260 3261 /** 3262 * ice_set_ops - set netdev and ethtools ops for the given netdev 3263 * @netdev: netdev instance 3264 */ 3265 static void ice_set_ops(struct net_device *netdev) 3266 { 3267 struct ice_pf *pf = ice_netdev_to_pf(netdev); 3268 3269 if (ice_is_safe_mode(pf)) { 3270 netdev->netdev_ops = &ice_netdev_safe_mode_ops; 3271 ice_set_ethtool_safe_mode_ops(netdev); 3272 return; 3273 } 3274 3275 netdev->netdev_ops = &ice_netdev_ops; 3276 netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; 3277 ice_set_ethtool_ops(netdev); 3278 } 3279 3280 /** 3281 * ice_set_netdev_features - set features for the given netdev 3282 * @netdev: netdev instance 3283 */ 3284 static void ice_set_netdev_features(struct net_device *netdev) 3285 { 3286 struct ice_pf *pf = ice_netdev_to_pf(netdev); 3287 bool is_dvm_ena = ice_is_dvm_ena(&pf->hw); 3288 netdev_features_t csumo_features; 3289 netdev_features_t vlano_features; 3290 netdev_features_t dflt_features; 3291 netdev_features_t tso_features; 3292 3293 if (ice_is_safe_mode(pf)) { 3294 /* safe mode */ 3295 netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA; 3296 netdev->hw_features = netdev->features; 3297 return; 3298 } 3299 3300 dflt_features = NETIF_F_SG | 3301 NETIF_F_HIGHDMA | 3302 NETIF_F_NTUPLE | 3303 NETIF_F_RXHASH; 3304 3305 csumo_features = NETIF_F_RXCSUM | 3306 NETIF_F_IP_CSUM | 3307 NETIF_F_SCTP_CRC | 3308 NETIF_F_IPV6_CSUM; 3309 3310 vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER | 3311 NETIF_F_HW_VLAN_CTAG_TX | 3312 NETIF_F_HW_VLAN_CTAG_RX; 3313 3314 /* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */ 3315 if (is_dvm_ena) 3316 vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER; 3317 3318 tso_features = NETIF_F_TSO | 3319 NETIF_F_TSO_ECN | 3320 NETIF_F_TSO6 | 3321 NETIF_F_GSO_GRE | 3322 NETIF_F_GSO_UDP_TUNNEL | 3323 NETIF_F_GSO_GRE_CSUM | 3324 NETIF_F_GSO_UDP_TUNNEL_CSUM | 3325 NETIF_F_GSO_PARTIAL | 3326 NETIF_F_GSO_IPXIP4 | 3327 NETIF_F_GSO_IPXIP6 | 3328 NETIF_F_GSO_UDP_L4; 3329 3330 netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | 3331 NETIF_F_GSO_GRE_CSUM; 3332 /* set features that user can change */ 3333 netdev->hw_features = dflt_features | csumo_features | 3334 vlano_features | tso_features; 3335 3336 /* add support for HW_CSUM on packets with MPLS header */ 3337 netdev->mpls_features = NETIF_F_HW_CSUM; 3338 3339 /* enable features */ 3340 netdev->features |= netdev->hw_features; 3341 3342 netdev->hw_features |= NETIF_F_HW_TC; 3343 3344 /* encap and VLAN devices inherit default, csumo and tso features */ 3345 netdev->hw_enc_features |= dflt_features | csumo_features | 3346 tso_features; 3347 netdev->vlan_features |= dflt_features | csumo_features | 3348 tso_features; 3349 3350 /* advertise support but don't enable by default since only one type of 3351 * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one 3352 * type turns on the other has to be turned off. This is enforced by the 3353 * ice_fix_features() ndo callback. 3354 */ 3355 if (is_dvm_ena) 3356 netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX | 3357 NETIF_F_HW_VLAN_STAG_TX; 3358 } 3359 3360 /** 3361 * ice_cfg_netdev - Allocate, configure and register a netdev 3362 * @vsi: the VSI associated with the new netdev 3363 * 3364 * Returns 0 on success, negative value on failure 3365 */ 3366 static int ice_cfg_netdev(struct ice_vsi *vsi) 3367 { 3368 struct ice_netdev_priv *np; 3369 struct net_device *netdev; 3370 u8 mac_addr[ETH_ALEN]; 3371 3372 netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, 3373 vsi->alloc_rxq); 3374 if (!netdev) 3375 return -ENOMEM; 3376 3377 set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); 3378 vsi->netdev = netdev; 3379 np = netdev_priv(netdev); 3380 np->vsi = vsi; 3381 3382 ice_set_netdev_features(netdev); 3383 3384 ice_set_ops(netdev); 3385 3386 if (vsi->type == ICE_VSI_PF) { 3387 SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back)); 3388 ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); 3389 eth_hw_addr_set(netdev, mac_addr); 3390 ether_addr_copy(netdev->perm_addr, mac_addr); 3391 } 3392 3393 netdev->priv_flags |= IFF_UNICAST_FLT; 3394 3395 /* Setup netdev TC information */ 3396 ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); 3397 3398 /* setup watchdog timeout value to be 5 second */ 3399 netdev->watchdog_timeo = 5 * HZ; 3400 3401 netdev->min_mtu = ETH_MIN_MTU; 3402 netdev->max_mtu = ICE_MAX_MTU; 3403 3404 return 0; 3405 } 3406 3407 /** 3408 * ice_fill_rss_lut - Fill the RSS lookup table with default values 3409 * @lut: Lookup table 3410 * @rss_table_size: Lookup table size 3411 * @rss_size: Range of queue number for hashing 3412 */ 3413 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) 3414 { 3415 u16 i; 3416 3417 for (i = 0; i < rss_table_size; i++) 3418 lut[i] = i % rss_size; 3419 } 3420 3421 /** 3422 * ice_pf_vsi_setup - Set up a PF VSI 3423 * @pf: board private structure 3424 * @pi: pointer to the port_info instance 3425 * 3426 * Returns pointer to the successfully allocated VSI software struct 3427 * on success, otherwise returns NULL on failure. 3428 */ 3429 static struct ice_vsi * 3430 ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) 3431 { 3432 return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL); 3433 } 3434 3435 static struct ice_vsi * 3436 ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, 3437 struct ice_channel *ch) 3438 { 3439 return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch); 3440 } 3441 3442 /** 3443 * ice_ctrl_vsi_setup - Set up a control VSI 3444 * @pf: board private structure 3445 * @pi: pointer to the port_info instance 3446 * 3447 * Returns pointer to the successfully allocated VSI software struct 3448 * on success, otherwise returns NULL on failure. 3449 */ 3450 static struct ice_vsi * 3451 ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) 3452 { 3453 return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL); 3454 } 3455 3456 /** 3457 * ice_lb_vsi_setup - Set up a loopback VSI 3458 * @pf: board private structure 3459 * @pi: pointer to the port_info instance 3460 * 3461 * Returns pointer to the successfully allocated VSI software struct 3462 * on success, otherwise returns NULL on failure. 3463 */ 3464 struct ice_vsi * 3465 ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) 3466 { 3467 return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL); 3468 } 3469 3470 /** 3471 * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload 3472 * @netdev: network interface to be adjusted 3473 * @proto: VLAN TPID 3474 * @vid: VLAN ID to be added 3475 * 3476 * net_device_ops implementation for adding VLAN IDs 3477 */ 3478 static int 3479 ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) 3480 { 3481 struct ice_netdev_priv *np = netdev_priv(netdev); 3482 struct ice_vsi_vlan_ops *vlan_ops; 3483 struct ice_vsi *vsi = np->vsi; 3484 struct ice_vlan vlan; 3485 int ret; 3486 3487 /* VLAN 0 is added by default during load/reset */ 3488 if (!vid) 3489 return 0; 3490 3491 vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); 3492 3493 /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged 3494 * packets aren't pruned by the device's internal switch on Rx 3495 */ 3496 vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0); 3497 ret = vlan_ops->add_vlan(vsi, &vlan); 3498 if (!ret) 3499 set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); 3500 3501 return ret; 3502 } 3503 3504 /** 3505 * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload 3506 * @netdev: network interface to be adjusted 3507 * @proto: VLAN TPID 3508 * @vid: VLAN ID to be removed 3509 * 3510 * net_device_ops implementation for removing VLAN IDs 3511 */ 3512 static int 3513 ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) 3514 { 3515 struct ice_netdev_priv *np = netdev_priv(netdev); 3516 struct ice_vsi_vlan_ops *vlan_ops; 3517 struct ice_vsi *vsi = np->vsi; 3518 struct ice_vlan vlan; 3519 int ret; 3520 3521 /* don't allow removal of VLAN 0 */ 3522 if (!vid) 3523 return 0; 3524 3525 vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); 3526 3527 /* Make sure VLAN delete is successful before updating VLAN 3528 * information 3529 */ 3530 vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0); 3531 ret = vlan_ops->del_vlan(vsi, &vlan); 3532 if (ret) 3533 return ret; 3534 3535 set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); 3536 return 0; 3537 } 3538 3539 /** 3540 * ice_rep_indr_tc_block_unbind 3541 * @cb_priv: indirection block private data 3542 */ 3543 static void ice_rep_indr_tc_block_unbind(void *cb_priv) 3544 { 3545 struct ice_indr_block_priv *indr_priv = cb_priv; 3546 3547 list_del(&indr_priv->list); 3548 kfree(indr_priv); 3549 } 3550 3551 /** 3552 * ice_tc_indir_block_unregister - Unregister TC indirect block notifications 3553 * @vsi: VSI struct which has the netdev 3554 */ 3555 static void ice_tc_indir_block_unregister(struct ice_vsi *vsi) 3556 { 3557 struct ice_netdev_priv *np = netdev_priv(vsi->netdev); 3558 3559 flow_indr_dev_unregister(ice_indr_setup_tc_cb, np, 3560 ice_rep_indr_tc_block_unbind); 3561 } 3562 3563 /** 3564 * ice_tc_indir_block_remove - clean indirect TC block notifications 3565 * @pf: PF structure 3566 */ 3567 static void ice_tc_indir_block_remove(struct ice_pf *pf) 3568 { 3569 struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); 3570 3571 if (!pf_vsi) 3572 return; 3573 3574 ice_tc_indir_block_unregister(pf_vsi); 3575 } 3576 3577 /** 3578 * ice_tc_indir_block_register - Register TC indirect block notifications 3579 * @vsi: VSI struct which has the netdev 3580 * 3581 * Returns 0 on success, negative value on failure 3582 */ 3583 static int ice_tc_indir_block_register(struct ice_vsi *vsi) 3584 { 3585 struct ice_netdev_priv *np; 3586 3587 if (!vsi || !vsi->netdev) 3588 return -EINVAL; 3589 3590 np = netdev_priv(vsi->netdev); 3591 3592 INIT_LIST_HEAD(&np->tc_indr_block_priv_list); 3593 return flow_indr_dev_register(ice_indr_setup_tc_cb, np); 3594 } 3595 3596 /** 3597 * ice_setup_pf_sw - Setup the HW switch on startup or after reset 3598 * @pf: board private structure 3599 * 3600 * Returns 0 on success, negative value on failure 3601 */ 3602 static int ice_setup_pf_sw(struct ice_pf *pf) 3603 { 3604 struct device *dev = ice_pf_to_dev(pf); 3605 bool dvm = ice_is_dvm_ena(&pf->hw); 3606 struct ice_vsi *vsi; 3607 int status; 3608 3609 if (ice_is_reset_in_progress(pf->state)) 3610 return -EBUSY; 3611 3612 status = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL); 3613 if (status) 3614 return -EIO; 3615 3616 vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); 3617 if (!vsi) 3618 return -ENOMEM; 3619 3620 /* init channel list */ 3621 INIT_LIST_HEAD(&vsi->ch_list); 3622 3623 status = ice_cfg_netdev(vsi); 3624 if (status) 3625 goto unroll_vsi_setup; 3626 /* netdev has to be configured before setting frame size */ 3627 ice_vsi_cfg_frame_size(vsi); 3628 3629 /* init indirect block notifications */ 3630 status = ice_tc_indir_block_register(vsi); 3631 if (status) { 3632 dev_err(dev, "Failed to register netdev notifier\n"); 3633 goto unroll_cfg_netdev; 3634 } 3635 3636 /* Setup DCB netlink interface */ 3637 ice_dcbnl_setup(vsi); 3638 3639 /* registering the NAPI handler requires both the queues and 3640 * netdev to be created, which are done in ice_pf_vsi_setup() 3641 * and ice_cfg_netdev() respectively 3642 */ 3643 ice_napi_add(vsi); 3644 3645 status = ice_set_cpu_rx_rmap(vsi); 3646 if (status) { 3647 dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", 3648 vsi->vsi_num, status); 3649 goto unroll_napi_add; 3650 } 3651 status = ice_init_mac_fltr(pf); 3652 if (status) 3653 goto free_cpu_rx_map; 3654 3655 return 0; 3656 3657 free_cpu_rx_map: 3658 ice_free_cpu_rx_rmap(vsi); 3659 unroll_napi_add: 3660 ice_tc_indir_block_unregister(vsi); 3661 unroll_cfg_netdev: 3662 if (vsi) { 3663 ice_napi_del(vsi); 3664 if (vsi->netdev) { 3665 clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); 3666 free_netdev(vsi->netdev); 3667 vsi->netdev = NULL; 3668 } 3669 } 3670 3671 unroll_vsi_setup: 3672 ice_vsi_release(vsi); 3673 return status; 3674 } 3675 3676 /** 3677 * ice_get_avail_q_count - Get count of queues in use 3678 * @pf_qmap: bitmap to get queue use count from 3679 * @lock: pointer to a mutex that protects access to pf_qmap 3680 * @size: size of the bitmap 3681 */ 3682 static u16 3683 ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size) 3684 { 3685 unsigned long bit; 3686 u16 count = 0; 3687 3688 mutex_lock(lock); 3689 for_each_clear_bit(bit, pf_qmap, size) 3690 count++; 3691 mutex_unlock(lock); 3692 3693 return count; 3694 } 3695 3696 /** 3697 * ice_get_avail_txq_count - Get count of Tx queues in use 3698 * @pf: pointer to an ice_pf instance 3699 */ 3700 u16 ice_get_avail_txq_count(struct ice_pf *pf) 3701 { 3702 return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex, 3703 pf->max_pf_txqs); 3704 } 3705 3706 /** 3707 * ice_get_avail_rxq_count - Get count of Rx queues in use 3708 * @pf: pointer to an ice_pf instance 3709 */ 3710 u16 ice_get_avail_rxq_count(struct ice_pf *pf) 3711 { 3712 return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex, 3713 pf->max_pf_rxqs); 3714 } 3715 3716 /** 3717 * ice_deinit_pf - Unrolls initialziations done by ice_init_pf 3718 * @pf: board private structure to initialize 3719 */ 3720 static void ice_deinit_pf(struct ice_pf *pf) 3721 { 3722 ice_service_task_stop(pf); 3723 mutex_destroy(&pf->sw_mutex); 3724 mutex_destroy(&pf->tc_mutex); 3725 mutex_destroy(&pf->avail_q_mutex); 3726 mutex_destroy(&pf->vfs.table_lock); 3727 3728 if (pf->avail_txqs) { 3729 bitmap_free(pf->avail_txqs); 3730 pf->avail_txqs = NULL; 3731 } 3732 3733 if (pf->avail_rxqs) { 3734 bitmap_free(pf->avail_rxqs); 3735 pf->avail_rxqs = NULL; 3736 } 3737 3738 if (pf->ptp.clock) 3739 ptp_clock_unregister(pf->ptp.clock); 3740 } 3741 3742 /** 3743 * ice_set_pf_caps - set PFs capability flags 3744 * @pf: pointer to the PF instance 3745 */ 3746 static void ice_set_pf_caps(struct ice_pf *pf) 3747 { 3748 struct ice_hw_func_caps *func_caps = &pf->hw.func_caps; 3749 3750 clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); 3751 if (func_caps->common_cap.rdma) 3752 set_bit(ICE_FLAG_RDMA_ENA, pf->flags); 3753 clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); 3754 if (func_caps->common_cap.dcb) 3755 set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); 3756 clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); 3757 if (func_caps->common_cap.sr_iov_1_1) { 3758 set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); 3759 pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs, 3760 ICE_MAX_SRIOV_VFS); 3761 } 3762 clear_bit(ICE_FLAG_RSS_ENA, pf->flags); 3763 if (func_caps->common_cap.rss_table_size) 3764 set_bit(ICE_FLAG_RSS_ENA, pf->flags); 3765 3766 clear_bit(ICE_FLAG_FD_ENA, pf->flags); 3767 if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) { 3768 u16 unused; 3769 3770 /* ctrl_vsi_idx will be set to a valid value when flow director 3771 * is setup by ice_init_fdir 3772 */ 3773 pf->ctrl_vsi_idx = ICE_NO_VSI; 3774 set_bit(ICE_FLAG_FD_ENA, pf->flags); 3775 /* force guaranteed filter pool for PF */ 3776 ice_alloc_fd_guar_item(&pf->hw, &unused, 3777 func_caps->fd_fltr_guar); 3778 /* force shared filter pool for PF */ 3779 ice_alloc_fd_shrd_item(&pf->hw, &unused, 3780 func_caps->fd_fltr_best_effort); 3781 } 3782 3783 clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); 3784 if (func_caps->common_cap.ieee_1588) 3785 set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); 3786 3787 pf->max_pf_txqs = func_caps->common_cap.num_txq; 3788 pf->max_pf_rxqs = func_caps->common_cap.num_rxq; 3789 } 3790 3791 /** 3792 * ice_init_pf - Initialize general software structures (struct ice_pf) 3793 * @pf: board private structure to initialize 3794 */ 3795 static int ice_init_pf(struct ice_pf *pf) 3796 { 3797 ice_set_pf_caps(pf); 3798 3799 mutex_init(&pf->sw_mutex); 3800 mutex_init(&pf->tc_mutex); 3801 3802 INIT_HLIST_HEAD(&pf->aq_wait_list); 3803 spin_lock_init(&pf->aq_wait_lock); 3804 init_waitqueue_head(&pf->aq_wait_queue); 3805 3806 init_waitqueue_head(&pf->reset_wait_queue); 3807 3808 /* setup service timer and periodic service task */ 3809 timer_setup(&pf->serv_tmr, ice_service_timer, 0); 3810 pf->serv_tmr_period = HZ; 3811 INIT_WORK(&pf->serv_task, ice_service_task); 3812 clear_bit(ICE_SERVICE_SCHED, pf->state); 3813 3814 mutex_init(&pf->avail_q_mutex); 3815 pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); 3816 if (!pf->avail_txqs) 3817 return -ENOMEM; 3818 3819 pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); 3820 if (!pf->avail_rxqs) { 3821 devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs); 3822 pf->avail_txqs = NULL; 3823 return -ENOMEM; 3824 } 3825 3826 mutex_init(&pf->vfs.table_lock); 3827 hash_init(pf->vfs.table); 3828 3829 return 0; 3830 } 3831 3832 /** 3833 * ice_ena_msix_range - Request a range of MSIX vectors from the OS 3834 * @pf: board private structure 3835 * 3836 * compute the number of MSIX vectors required (v_budget) and request from 3837 * the OS. Return the number of vectors reserved or negative on failure 3838 */ 3839 static int ice_ena_msix_range(struct ice_pf *pf) 3840 { 3841 int num_cpus, v_left, v_actual, v_other, v_budget = 0; 3842 struct device *dev = ice_pf_to_dev(pf); 3843 int needed, err, i; 3844 3845 v_left = pf->hw.func_caps.common_cap.num_msix_vectors; 3846 num_cpus = num_online_cpus(); 3847 3848 /* reserve for LAN miscellaneous handler */ 3849 needed = ICE_MIN_LAN_OICR_MSIX; 3850 if (v_left < needed) 3851 goto no_hw_vecs_left_err; 3852 v_budget += needed; 3853 v_left -= needed; 3854 3855 /* reserve for flow director */ 3856 if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { 3857 needed = ICE_FDIR_MSIX; 3858 if (v_left < needed) 3859 goto no_hw_vecs_left_err; 3860 v_budget += needed; 3861 v_left -= needed; 3862 } 3863 3864 /* reserve for switchdev */ 3865 needed = ICE_ESWITCH_MSIX; 3866 if (v_left < needed) 3867 goto no_hw_vecs_left_err; 3868 v_budget += needed; 3869 v_left -= needed; 3870 3871 /* total used for non-traffic vectors */ 3872 v_other = v_budget; 3873 3874 /* reserve vectors for LAN traffic */ 3875 needed = num_cpus; 3876 if (v_left < needed) 3877 goto no_hw_vecs_left_err; 3878 pf->num_lan_msix = needed; 3879 v_budget += needed; 3880 v_left -= needed; 3881 3882 /* reserve vectors for RDMA auxiliary driver */ 3883 if (ice_is_rdma_ena(pf)) { 3884 needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; 3885 if (v_left < needed) 3886 goto no_hw_vecs_left_err; 3887 pf->num_rdma_msix = needed; 3888 v_budget += needed; 3889 v_left -= needed; 3890 } 3891 3892 pf->msix_entries = devm_kcalloc(dev, v_budget, 3893 sizeof(*pf->msix_entries), GFP_KERNEL); 3894 if (!pf->msix_entries) { 3895 err = -ENOMEM; 3896 goto exit_err; 3897 } 3898 3899 for (i = 0; i < v_budget; i++) 3900 pf->msix_entries[i].entry = i; 3901 3902 /* actually reserve the vectors */ 3903 v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, 3904 ICE_MIN_MSIX, v_budget); 3905 if (v_actual < 0) { 3906 dev_err(dev, "unable to reserve MSI-X vectors\n"); 3907 err = v_actual; 3908 goto msix_err; 3909 } 3910 3911 if (v_actual < v_budget) { 3912 dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", 3913 v_budget, v_actual); 3914 3915 if (v_actual < ICE_MIN_MSIX) { 3916 /* error if we can't get minimum vectors */ 3917 pci_disable_msix(pf->pdev); 3918 err = -ERANGE; 3919 goto msix_err; 3920 } else { 3921 int v_remain = v_actual - v_other; 3922 int v_rdma = 0, v_min_rdma = 0; 3923 3924 if (ice_is_rdma_ena(pf)) { 3925 /* Need at least 1 interrupt in addition to 3926 * AEQ MSIX 3927 */ 3928 v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; 3929 v_min_rdma = ICE_MIN_RDMA_MSIX; 3930 } 3931 3932 if (v_actual == ICE_MIN_MSIX || 3933 v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) { 3934 dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n"); 3935 clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); 3936 3937 pf->num_rdma_msix = 0; 3938 pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; 3939 } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || 3940 (v_remain - v_rdma < v_rdma)) { 3941 /* Support minimum RDMA and give remaining 3942 * vectors to LAN MSIX 3943 */ 3944 pf->num_rdma_msix = v_min_rdma; 3945 pf->num_lan_msix = v_remain - v_min_rdma; 3946 } else { 3947 /* Split remaining MSIX with RDMA after 3948 * accounting for AEQ MSIX 3949 */ 3950 pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + 3951 ICE_RDMA_NUM_AEQ_MSIX; 3952 pf->num_lan_msix = v_remain - pf->num_rdma_msix; 3953 } 3954 3955 dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", 3956 pf->num_lan_msix); 3957 3958 if (ice_is_rdma_ena(pf)) 3959 dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", 3960 pf->num_rdma_msix); 3961 } 3962 } 3963 3964 return v_actual; 3965 3966 msix_err: 3967 devm_kfree(dev, pf->msix_entries); 3968 goto exit_err; 3969 3970 no_hw_vecs_left_err: 3971 dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n", 3972 needed, v_left); 3973 err = -ERANGE; 3974 exit_err: 3975 pf->num_rdma_msix = 0; 3976 pf->num_lan_msix = 0; 3977 return err; 3978 } 3979 3980 /** 3981 * ice_dis_msix - Disable MSI-X interrupt setup in OS 3982 * @pf: board private structure 3983 */ 3984 static void ice_dis_msix(struct ice_pf *pf) 3985 { 3986 pci_disable_msix(pf->pdev); 3987 devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); 3988 pf->msix_entries = NULL; 3989 } 3990 3991 /** 3992 * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme 3993 * @pf: board private structure 3994 */ 3995 static void ice_clear_interrupt_scheme(struct ice_pf *pf) 3996 { 3997 ice_dis_msix(pf); 3998 3999 if (pf->irq_tracker) { 4000 devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); 4001 pf->irq_tracker = NULL; 4002 } 4003 } 4004 4005 /** 4006 * ice_init_interrupt_scheme - Determine proper interrupt scheme 4007 * @pf: board private structure to initialize 4008 */ 4009 static int ice_init_interrupt_scheme(struct ice_pf *pf) 4010 { 4011 int vectors; 4012 4013 vectors = ice_ena_msix_range(pf); 4014 4015 if (vectors < 0) 4016 return vectors; 4017 4018 /* set up vector assignment tracking */ 4019 pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf), 4020 struct_size(pf->irq_tracker, list, vectors), 4021 GFP_KERNEL); 4022 if (!pf->irq_tracker) { 4023 ice_dis_msix(pf); 4024 return -ENOMEM; 4025 } 4026 4027 /* populate SW interrupts pool with number of OS granted IRQs. */ 4028 pf->num_avail_sw_msix = (u16)vectors; 4029 pf->irq_tracker->num_entries = (u16)vectors; 4030 pf->irq_tracker->end = pf->irq_tracker->num_entries; 4031 4032 return 0; 4033 } 4034 4035 /** 4036 * ice_is_wol_supported - check if WoL is supported 4037 * @hw: pointer to hardware info 4038 * 4039 * Check if WoL is supported based on the HW configuration. 4040 * Returns true if NVM supports and enables WoL for this port, false otherwise 4041 */ 4042 bool ice_is_wol_supported(struct ice_hw *hw) 4043 { 4044 u16 wol_ctrl; 4045 4046 /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control 4047 * word) indicates WoL is not supported on the corresponding PF ID. 4048 */ 4049 if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl)) 4050 return false; 4051 4052 return !(BIT(hw->port_info->lport) & wol_ctrl); 4053 } 4054 4055 /** 4056 * ice_vsi_recfg_qs - Change the number of queues on a VSI 4057 * @vsi: VSI being changed 4058 * @new_rx: new number of Rx queues 4059 * @new_tx: new number of Tx queues 4060 * 4061 * Only change the number of queues if new_tx, or new_rx is non-0. 4062 * 4063 * Returns 0 on success. 4064 */ 4065 int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) 4066 { 4067 struct ice_pf *pf = vsi->back; 4068 int err = 0, timeout = 50; 4069 4070 if (!new_rx && !new_tx) 4071 return -EINVAL; 4072 4073 while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { 4074 timeout--; 4075 if (!timeout) 4076 return -EBUSY; 4077 usleep_range(1000, 2000); 4078 } 4079 4080 if (new_tx) 4081 vsi->req_txq = (u16)new_tx; 4082 if (new_rx) 4083 vsi->req_rxq = (u16)new_rx; 4084 4085 /* set for the next time the netdev is started */ 4086 if (!netif_running(vsi->netdev)) { 4087 ice_vsi_rebuild(vsi, false); 4088 dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n"); 4089 goto done; 4090 } 4091 4092 ice_vsi_close(vsi); 4093 ice_vsi_rebuild(vsi, false); 4094 ice_pf_dcb_recfg(pf); 4095 ice_vsi_open(vsi); 4096 done: 4097 clear_bit(ICE_CFG_BUSY, pf->state); 4098 return err; 4099 } 4100 4101 /** 4102 * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode 4103 * @pf: PF to configure 4104 * 4105 * No VLAN offloads/filtering are advertised in safe mode so make sure the PF 4106 * VSI can still Tx/Rx VLAN tagged packets. 4107 */ 4108 static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf) 4109 { 4110 struct ice_vsi *vsi = ice_get_main_vsi(pf); 4111 struct ice_vsi_ctx *ctxt; 4112 struct ice_hw *hw; 4113 int status; 4114 4115 if (!vsi) 4116 return; 4117 4118 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 4119 if (!ctxt) 4120 return; 4121 4122 hw = &pf->hw; 4123 ctxt->info = vsi->info; 4124 4125 ctxt->info.valid_sections = 4126 cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID | 4127 ICE_AQ_VSI_PROP_SECURITY_VALID | 4128 ICE_AQ_VSI_PROP_SW_VALID); 4129 4130 /* disable VLAN anti-spoof */ 4131 ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << 4132 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); 4133 4134 /* disable VLAN pruning and keep all other settings */ 4135 ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; 4136 4137 /* allow all VLANs on Tx and don't strip on Rx */ 4138 ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL | 4139 ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; 4140 4141 status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); 4142 if (status) { 4143 dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n", 4144 status, ice_aq_str(hw->adminq.sq_last_status)); 4145 } else { 4146 vsi->info.sec_flags = ctxt->info.sec_flags; 4147 vsi->info.sw_flags2 = ctxt->info.sw_flags2; 4148 vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags; 4149 } 4150 4151 kfree(ctxt); 4152 } 4153 4154 /** 4155 * ice_log_pkg_init - log result of DDP package load 4156 * @hw: pointer to hardware info 4157 * @state: state of package load 4158 */ 4159 static void ice_log_pkg_init(struct ice_hw *hw, enum ice_ddp_state state) 4160 { 4161 struct ice_pf *pf = hw->back; 4162 struct device *dev; 4163 4164 dev = ice_pf_to_dev(pf); 4165 4166 switch (state) { 4167 case ICE_DDP_PKG_SUCCESS: 4168 dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n", 4169 hw->active_pkg_name, 4170 hw->active_pkg_ver.major, 4171 hw->active_pkg_ver.minor, 4172 hw->active_pkg_ver.update, 4173 hw->active_pkg_ver.draft); 4174 break; 4175 case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED: 4176 dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n", 4177 hw->active_pkg_name, 4178 hw->active_pkg_ver.major, 4179 hw->active_pkg_ver.minor, 4180 hw->active_pkg_ver.update, 4181 hw->active_pkg_ver.draft); 4182 break; 4183 case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED: 4184 dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", 4185 hw->active_pkg_name, 4186 hw->active_pkg_ver.major, 4187 hw->active_pkg_ver.minor, 4188 ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); 4189 break; 4190 case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED: 4191 dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n", 4192 hw->active_pkg_name, 4193 hw->active_pkg_ver.major, 4194 hw->active_pkg_ver.minor, 4195 hw->active_pkg_ver.update, 4196 hw->active_pkg_ver.draft, 4197 hw->pkg_name, 4198 hw->pkg_ver.major, 4199 hw->pkg_ver.minor, 4200 hw->pkg_ver.update, 4201 hw->pkg_ver.draft); 4202 break; 4203 case ICE_DDP_PKG_FW_MISMATCH: 4204 dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n"); 4205 break; 4206 case ICE_DDP_PKG_INVALID_FILE: 4207 dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n"); 4208 break; 4209 case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH: 4210 dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n"); 4211 break; 4212 case ICE_DDP_PKG_FILE_VERSION_TOO_LOW: 4213 dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n", 4214 ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); 4215 break; 4216 case ICE_DDP_PKG_FILE_SIGNATURE_INVALID: 4217 dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n"); 4218 break; 4219 case ICE_DDP_PKG_FILE_REVISION_TOO_LOW: 4220 dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n"); 4221 break; 4222 case ICE_DDP_PKG_LOAD_ERROR: 4223 dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n"); 4224 /* poll for reset to complete */ 4225 if (ice_check_reset(hw)) 4226 dev_err(dev, "Error resetting device. Please reload the driver\n"); 4227 break; 4228 case ICE_DDP_PKG_ERR: 4229 default: 4230 dev_err(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n"); 4231 break; 4232 } 4233 } 4234 4235 /** 4236 * ice_load_pkg - load/reload the DDP Package file 4237 * @firmware: firmware structure when firmware requested or NULL for reload 4238 * @pf: pointer to the PF instance 4239 * 4240 * Called on probe and post CORER/GLOBR rebuild to load DDP Package and 4241 * initialize HW tables. 4242 */ 4243 static void 4244 ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf) 4245 { 4246 enum ice_ddp_state state = ICE_DDP_PKG_ERR; 4247 struct device *dev = ice_pf_to_dev(pf); 4248 struct ice_hw *hw = &pf->hw; 4249 4250 /* Load DDP Package */ 4251 if (firmware && !hw->pkg_copy) { 4252 state = ice_copy_and_init_pkg(hw, firmware->data, 4253 firmware->size); 4254 ice_log_pkg_init(hw, state); 4255 } else if (!firmware && hw->pkg_copy) { 4256 /* Reload package during rebuild after CORER/GLOBR reset */ 4257 state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); 4258 ice_log_pkg_init(hw, state); 4259 } else { 4260 dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n"); 4261 } 4262 4263 if (!ice_is_init_pkg_successful(state)) { 4264 /* Safe Mode */ 4265 clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags); 4266 return; 4267 } 4268 4269 /* Successful download package is the precondition for advanced 4270 * features, hence setting the ICE_FLAG_ADV_FEATURES flag 4271 */ 4272 set_bit(ICE_FLAG_ADV_FEATURES, pf->flags); 4273 } 4274 4275 /** 4276 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines 4277 * @pf: pointer to the PF structure 4278 * 4279 * There is no error returned here because the driver should be able to handle 4280 * 128 Byte cache lines, so we only print a warning in case issues are seen, 4281 * specifically with Tx. 4282 */ 4283 static void ice_verify_cacheline_size(struct ice_pf *pf) 4284 { 4285 if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M) 4286 dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", 4287 ICE_CACHE_LINE_BYTES); 4288 } 4289 4290 /** 4291 * ice_send_version - update firmware with driver version 4292 * @pf: PF struct 4293 * 4294 * Returns 0 on success, else error code 4295 */ 4296 static int ice_send_version(struct ice_pf *pf) 4297 { 4298 struct ice_driver_ver dv; 4299 4300 dv.major_ver = 0xff; 4301 dv.minor_ver = 0xff; 4302 dv.build_ver = 0xff; 4303 dv.subbuild_ver = 0; 4304 strscpy((char *)dv.driver_string, UTS_RELEASE, 4305 sizeof(dv.driver_string)); 4306 return ice_aq_send_driver_ver(&pf->hw, &dv, NULL); 4307 } 4308 4309 /** 4310 * ice_init_fdir - Initialize flow director VSI and configuration 4311 * @pf: pointer to the PF instance 4312 * 4313 * returns 0 on success, negative on error 4314 */ 4315 static int ice_init_fdir(struct ice_pf *pf) 4316 { 4317 struct device *dev = ice_pf_to_dev(pf); 4318 struct ice_vsi *ctrl_vsi; 4319 int err; 4320 4321 /* Side Band Flow Director needs to have a control VSI. 4322 * Allocate it and store it in the PF. 4323 */ 4324 ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info); 4325 if (!ctrl_vsi) { 4326 dev_dbg(dev, "could not create control VSI\n"); 4327 return -ENOMEM; 4328 } 4329 4330 err = ice_vsi_open_ctrl(ctrl_vsi); 4331 if (err) { 4332 dev_dbg(dev, "could not open control VSI\n"); 4333 goto err_vsi_open; 4334 } 4335 4336 mutex_init(&pf->hw.fdir_fltr_lock); 4337 4338 err = ice_fdir_create_dflt_rules(pf); 4339 if (err) 4340 goto err_fdir_rule; 4341 4342 return 0; 4343 4344 err_fdir_rule: 4345 ice_fdir_release_flows(&pf->hw); 4346 ice_vsi_close(ctrl_vsi); 4347 err_vsi_open: 4348 ice_vsi_release(ctrl_vsi); 4349 if (pf->ctrl_vsi_idx != ICE_NO_VSI) { 4350 pf->vsi[pf->ctrl_vsi_idx] = NULL; 4351 pf->ctrl_vsi_idx = ICE_NO_VSI; 4352 } 4353 return err; 4354 } 4355 4356 /** 4357 * ice_get_opt_fw_name - return optional firmware file name or NULL 4358 * @pf: pointer to the PF instance 4359 */ 4360 static char *ice_get_opt_fw_name(struct ice_pf *pf) 4361 { 4362 /* Optional firmware name same as default with additional dash 4363 * followed by a EUI-64 identifier (PCIe Device Serial Number) 4364 */ 4365 struct pci_dev *pdev = pf->pdev; 4366 char *opt_fw_filename; 4367 u64 dsn; 4368 4369 /* Determine the name of the optional file using the DSN (two 4370 * dwords following the start of the DSN Capability). 4371 */ 4372 dsn = pci_get_dsn(pdev); 4373 if (!dsn) 4374 return NULL; 4375 4376 opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL); 4377 if (!opt_fw_filename) 4378 return NULL; 4379 4380 snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg", 4381 ICE_DDP_PKG_PATH, dsn); 4382 4383 return opt_fw_filename; 4384 } 4385 4386 /** 4387 * ice_request_fw - Device initialization routine 4388 * @pf: pointer to the PF instance 4389 */ 4390 static void ice_request_fw(struct ice_pf *pf) 4391 { 4392 char *opt_fw_filename = ice_get_opt_fw_name(pf); 4393 const struct firmware *firmware = NULL; 4394 struct device *dev = ice_pf_to_dev(pf); 4395 int err = 0; 4396 4397 /* optional device-specific DDP (if present) overrides the default DDP 4398 * package file. kernel logs a debug message if the file doesn't exist, 4399 * and warning messages for other errors. 4400 */ 4401 if (opt_fw_filename) { 4402 err = firmware_request_nowarn(&firmware, opt_fw_filename, dev); 4403 if (err) { 4404 kfree(opt_fw_filename); 4405 goto dflt_pkg_load; 4406 } 4407 4408 /* request for firmware was successful. Download to device */ 4409 ice_load_pkg(firmware, pf); 4410 kfree(opt_fw_filename); 4411 release_firmware(firmware); 4412 return; 4413 } 4414 4415 dflt_pkg_load: 4416 err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev); 4417 if (err) { 4418 dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); 4419 return; 4420 } 4421 4422 /* request for firmware was successful. Download to device */ 4423 ice_load_pkg(firmware, pf); 4424 release_firmware(firmware); 4425 } 4426 4427 /** 4428 * ice_print_wake_reason - show the wake up cause in the log 4429 * @pf: pointer to the PF struct 4430 */ 4431 static void ice_print_wake_reason(struct ice_pf *pf) 4432 { 4433 u32 wus = pf->wakeup_reason; 4434 const char *wake_str; 4435 4436 /* if no wake event, nothing to print */ 4437 if (!wus) 4438 return; 4439 4440 if (wus & PFPM_WUS_LNKC_M) 4441 wake_str = "Link\n"; 4442 else if (wus & PFPM_WUS_MAG_M) 4443 wake_str = "Magic Packet\n"; 4444 else if (wus & PFPM_WUS_MNG_M) 4445 wake_str = "Management\n"; 4446 else if (wus & PFPM_WUS_FW_RST_WK_M) 4447 wake_str = "Firmware Reset\n"; 4448 else 4449 wake_str = "Unknown\n"; 4450 4451 dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str); 4452 } 4453 4454 /** 4455 * ice_register_netdev - register netdev and devlink port 4456 * @pf: pointer to the PF struct 4457 */ 4458 static int ice_register_netdev(struct ice_pf *pf) 4459 { 4460 struct ice_vsi *vsi; 4461 int err = 0; 4462 4463 vsi = ice_get_main_vsi(pf); 4464 if (!vsi || !vsi->netdev) 4465 return -EIO; 4466 4467 err = register_netdev(vsi->netdev); 4468 if (err) 4469 goto err_register_netdev; 4470 4471 set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); 4472 netif_carrier_off(vsi->netdev); 4473 netif_tx_stop_all_queues(vsi->netdev); 4474 err = ice_devlink_create_pf_port(pf); 4475 if (err) 4476 goto err_devlink_create; 4477 4478 devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); 4479 4480 return 0; 4481 err_devlink_create: 4482 unregister_netdev(vsi->netdev); 4483 clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); 4484 err_register_netdev: 4485 free_netdev(vsi->netdev); 4486 vsi->netdev = NULL; 4487 clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); 4488 return err; 4489 } 4490 4491 /** 4492 * ice_probe - Device initialization routine 4493 * @pdev: PCI device information struct 4494 * @ent: entry in ice_pci_tbl 4495 * 4496 * Returns 0 on success, negative on failure 4497 */ 4498 static int 4499 ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) 4500 { 4501 struct device *dev = &pdev->dev; 4502 struct ice_pf *pf; 4503 struct ice_hw *hw; 4504 int i, err; 4505 4506 if (pdev->is_virtfn) { 4507 dev_err(dev, "can't probe a virtual function\n"); 4508 return -EINVAL; 4509 } 4510 4511 /* this driver uses devres, see 4512 * Documentation/driver-api/driver-model/devres.rst 4513 */ 4514 err = pcim_enable_device(pdev); 4515 if (err) 4516 return err; 4517 4518 err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev)); 4519 if (err) { 4520 dev_err(dev, "BAR0 I/O map error %d\n", err); 4521 return err; 4522 } 4523 4524 pf = ice_allocate_pf(dev); 4525 if (!pf) 4526 return -ENOMEM; 4527 4528 /* initialize Auxiliary index to invalid value */ 4529 pf->aux_idx = -1; 4530 4531 /* set up for high or low DMA */ 4532 err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); 4533 if (err) { 4534 dev_err(dev, "DMA configuration failed: 0x%x\n", err); 4535 return err; 4536 } 4537 4538 pci_enable_pcie_error_reporting(pdev); 4539 pci_set_master(pdev); 4540 4541 pf->pdev = pdev; 4542 pci_set_drvdata(pdev, pf); 4543 set_bit(ICE_DOWN, pf->state); 4544 /* Disable service task until DOWN bit is cleared */ 4545 set_bit(ICE_SERVICE_DIS, pf->state); 4546 4547 hw = &pf->hw; 4548 hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; 4549 pci_save_state(pdev); 4550 4551 hw->back = pf; 4552 hw->vendor_id = pdev->vendor; 4553 hw->device_id = pdev->device; 4554 pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); 4555 hw->subsystem_vendor_id = pdev->subsystem_vendor; 4556 hw->subsystem_device_id = pdev->subsystem_device; 4557 hw->bus.device = PCI_SLOT(pdev->devfn); 4558 hw->bus.func = PCI_FUNC(pdev->devfn); 4559 ice_set_ctrlq_len(hw); 4560 4561 pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); 4562 4563 #ifndef CONFIG_DYNAMIC_DEBUG 4564 if (debug < -1) 4565 hw->debug_mask = debug; 4566 #endif 4567 4568 err = ice_init_hw(hw); 4569 if (err) { 4570 dev_err(dev, "ice_init_hw failed: %d\n", err); 4571 err = -EIO; 4572 goto err_exit_unroll; 4573 } 4574 4575 ice_init_feature_support(pf); 4576 4577 ice_request_fw(pf); 4578 4579 /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be 4580 * set in pf->state, which will cause ice_is_safe_mode to return 4581 * true 4582 */ 4583 if (ice_is_safe_mode(pf)) { 4584 /* we already got function/device capabilities but these don't 4585 * reflect what the driver needs to do in safe mode. Instead of 4586 * adding conditional logic everywhere to ignore these 4587 * device/function capabilities, override them. 4588 */ 4589 ice_set_safe_mode_caps(hw); 4590 } 4591 4592 err = ice_init_pf(pf); 4593 if (err) { 4594 dev_err(dev, "ice_init_pf failed: %d\n", err); 4595 goto err_init_pf_unroll; 4596 } 4597 4598 ice_devlink_init_regions(pf); 4599 4600 pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; 4601 pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; 4602 pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; 4603 pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; 4604 i = 0; 4605 if (pf->hw.tnl.valid_count[TNL_VXLAN]) { 4606 pf->hw.udp_tunnel_nic.tables[i].n_entries = 4607 pf->hw.tnl.valid_count[TNL_VXLAN]; 4608 pf->hw.udp_tunnel_nic.tables[i].tunnel_types = 4609 UDP_TUNNEL_TYPE_VXLAN; 4610 i++; 4611 } 4612 if (pf->hw.tnl.valid_count[TNL_GENEVE]) { 4613 pf->hw.udp_tunnel_nic.tables[i].n_entries = 4614 pf->hw.tnl.valid_count[TNL_GENEVE]; 4615 pf->hw.udp_tunnel_nic.tables[i].tunnel_types = 4616 UDP_TUNNEL_TYPE_GENEVE; 4617 i++; 4618 } 4619 4620 pf->num_alloc_vsi = hw->func_caps.guar_num_vsi; 4621 if (!pf->num_alloc_vsi) { 4622 err = -EIO; 4623 goto err_init_pf_unroll; 4624 } 4625 if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { 4626 dev_warn(&pf->pdev->dev, 4627 "limiting the VSI count due to UDP tunnel limitation %d > %d\n", 4628 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); 4629 pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; 4630 } 4631 4632 pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), 4633 GFP_KERNEL); 4634 if (!pf->vsi) { 4635 err = -ENOMEM; 4636 goto err_init_pf_unroll; 4637 } 4638 4639 err = ice_init_interrupt_scheme(pf); 4640 if (err) { 4641 dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); 4642 err = -EIO; 4643 goto err_init_vsi_unroll; 4644 } 4645 4646 /* In case of MSIX we are going to setup the misc vector right here 4647 * to handle admin queue events etc. In case of legacy and MSI 4648 * the misc functionality and queue processing is combined in 4649 * the same vector and that gets setup at open. 4650 */ 4651 err = ice_req_irq_msix_misc(pf); 4652 if (err) { 4653 dev_err(dev, "setup of misc vector failed: %d\n", err); 4654 goto err_init_interrupt_unroll; 4655 } 4656 4657 /* create switch struct for the switch element created by FW on boot */ 4658 pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL); 4659 if (!pf->first_sw) { 4660 err = -ENOMEM; 4661 goto err_msix_misc_unroll; 4662 } 4663 4664 if (hw->evb_veb) 4665 pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; 4666 else 4667 pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA; 4668 4669 pf->first_sw->pf = pf; 4670 4671 /* record the sw_id available for later use */ 4672 pf->first_sw->sw_id = hw->port_info->sw_id; 4673 4674 err = ice_setup_pf_sw(pf); 4675 if (err) { 4676 dev_err(dev, "probe failed due to setup PF switch: %d\n", err); 4677 goto err_alloc_sw_unroll; 4678 } 4679 4680 clear_bit(ICE_SERVICE_DIS, pf->state); 4681 4682 /* tell the firmware we are up */ 4683 err = ice_send_version(pf); 4684 if (err) { 4685 dev_err(dev, "probe failed sending driver version %s. error: %d\n", 4686 UTS_RELEASE, err); 4687 goto err_send_version_unroll; 4688 } 4689 4690 /* since everything is good, start the service timer */ 4691 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); 4692 4693 err = ice_init_link_events(pf->hw.port_info); 4694 if (err) { 4695 dev_err(dev, "ice_init_link_events failed: %d\n", err); 4696 goto err_send_version_unroll; 4697 } 4698 4699 /* not a fatal error if this fails */ 4700 err = ice_init_nvm_phy_type(pf->hw.port_info); 4701 if (err) 4702 dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err); 4703 4704 /* not a fatal error if this fails */ 4705 err = ice_update_link_info(pf->hw.port_info); 4706 if (err) 4707 dev_err(dev, "ice_update_link_info failed: %d\n", err); 4708 4709 ice_init_link_dflt_override(pf->hw.port_info); 4710 4711 ice_check_link_cfg_err(pf, 4712 pf->hw.port_info->phy.link_info.link_cfg_err); 4713 4714 /* if media available, initialize PHY settings */ 4715 if (pf->hw.port_info->phy.link_info.link_info & 4716 ICE_AQ_MEDIA_AVAILABLE) { 4717 /* not a fatal error if this fails */ 4718 err = ice_init_phy_user_cfg(pf->hw.port_info); 4719 if (err) 4720 dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err); 4721 4722 if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) { 4723 struct ice_vsi *vsi = ice_get_main_vsi(pf); 4724 4725 if (vsi) 4726 ice_configure_phy(vsi); 4727 } 4728 } else { 4729 set_bit(ICE_FLAG_NO_MEDIA, pf->flags); 4730 } 4731 4732 ice_verify_cacheline_size(pf); 4733 4734 /* Save wakeup reason register for later use */ 4735 pf->wakeup_reason = rd32(hw, PFPM_WUS); 4736 4737 /* check for a power management event */ 4738 ice_print_wake_reason(pf); 4739 4740 /* clear wake status, all bits */ 4741 wr32(hw, PFPM_WUS, U32_MAX); 4742 4743 /* Disable WoL at init, wait for user to enable */ 4744 device_set_wakeup_enable(dev, false); 4745 4746 if (ice_is_safe_mode(pf)) { 4747 ice_set_safe_mode_vlan_cfg(pf); 4748 goto probe_done; 4749 } 4750 4751 /* initialize DDP driven features */ 4752 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) 4753 ice_ptp_init(pf); 4754 4755 if (ice_is_feature_supported(pf, ICE_F_GNSS)) 4756 ice_gnss_init(pf); 4757 4758 /* Note: Flow director init failure is non-fatal to load */ 4759 if (ice_init_fdir(pf)) 4760 dev_err(dev, "could not initialize flow director\n"); 4761 4762 /* Note: DCB init failure is non-fatal to load */ 4763 if (ice_init_pf_dcb(pf, false)) { 4764 clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); 4765 clear_bit(ICE_FLAG_DCB_ENA, pf->flags); 4766 } else { 4767 ice_cfg_lldp_mib_change(&pf->hw, true); 4768 } 4769 4770 if (ice_init_lag(pf)) 4771 dev_warn(dev, "Failed to init link aggregation support\n"); 4772 4773 /* print PCI link speed and width */ 4774 pcie_print_link_status(pf->pdev); 4775 4776 probe_done: 4777 err = ice_register_netdev(pf); 4778 if (err) 4779 goto err_netdev_reg; 4780 4781 err = ice_devlink_register_params(pf); 4782 if (err) 4783 goto err_netdev_reg; 4784 4785 /* ready to go, so clear down state bit */ 4786 clear_bit(ICE_DOWN, pf->state); 4787 if (ice_is_rdma_ena(pf)) { 4788 pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL); 4789 if (pf->aux_idx < 0) { 4790 dev_err(dev, "Failed to allocate device ID for AUX driver\n"); 4791 err = -ENOMEM; 4792 goto err_devlink_reg_param; 4793 } 4794 4795 err = ice_init_rdma(pf); 4796 if (err) { 4797 dev_err(dev, "Failed to initialize RDMA: %d\n", err); 4798 err = -EIO; 4799 goto err_init_aux_unroll; 4800 } 4801 } else { 4802 dev_warn(dev, "RDMA is not supported on this device\n"); 4803 } 4804 4805 ice_devlink_register(pf); 4806 return 0; 4807 4808 err_init_aux_unroll: 4809 pf->adev = NULL; 4810 ida_free(&ice_aux_ida, pf->aux_idx); 4811 err_devlink_reg_param: 4812 ice_devlink_unregister_params(pf); 4813 err_netdev_reg: 4814 err_send_version_unroll: 4815 ice_vsi_release_all(pf); 4816 err_alloc_sw_unroll: 4817 set_bit(ICE_SERVICE_DIS, pf->state); 4818 set_bit(ICE_DOWN, pf->state); 4819 devm_kfree(dev, pf->first_sw); 4820 err_msix_misc_unroll: 4821 ice_free_irq_msix_misc(pf); 4822 err_init_interrupt_unroll: 4823 ice_clear_interrupt_scheme(pf); 4824 err_init_vsi_unroll: 4825 devm_kfree(dev, pf->vsi); 4826 err_init_pf_unroll: 4827 ice_deinit_pf(pf); 4828 ice_devlink_destroy_regions(pf); 4829 ice_deinit_hw(hw); 4830 err_exit_unroll: 4831 pci_disable_pcie_error_reporting(pdev); 4832 pci_disable_device(pdev); 4833 return err; 4834 } 4835 4836 /** 4837 * ice_set_wake - enable or disable Wake on LAN 4838 * @pf: pointer to the PF struct 4839 * 4840 * Simple helper for WoL control 4841 */ 4842 static void ice_set_wake(struct ice_pf *pf) 4843 { 4844 struct ice_hw *hw = &pf->hw; 4845 bool wol = pf->wol_ena; 4846 4847 /* clear wake state, otherwise new wake events won't fire */ 4848 wr32(hw, PFPM_WUS, U32_MAX); 4849 4850 /* enable / disable APM wake up, no RMW needed */ 4851 wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0); 4852 4853 /* set magic packet filter enabled */ 4854 wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0); 4855 } 4856 4857 /** 4858 * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet 4859 * @pf: pointer to the PF struct 4860 * 4861 * Issue firmware command to enable multicast magic wake, making 4862 * sure that any locally administered address (LAA) is used for 4863 * wake, and that PF reset doesn't undo the LAA. 4864 */ 4865 static void ice_setup_mc_magic_wake(struct ice_pf *pf) 4866 { 4867 struct device *dev = ice_pf_to_dev(pf); 4868 struct ice_hw *hw = &pf->hw; 4869 u8 mac_addr[ETH_ALEN]; 4870 struct ice_vsi *vsi; 4871 int status; 4872 u8 flags; 4873 4874 if (!pf->wol_ena) 4875 return; 4876 4877 vsi = ice_get_main_vsi(pf); 4878 if (!vsi) 4879 return; 4880 4881 /* Get current MAC address in case it's an LAA */ 4882 if (vsi->netdev) 4883 ether_addr_copy(mac_addr, vsi->netdev->dev_addr); 4884 else 4885 ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); 4886 4887 flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN | 4888 ICE_AQC_MAN_MAC_UPDATE_LAA_WOL | 4889 ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP; 4890 4891 status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL); 4892 if (status) 4893 dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n", 4894 status, ice_aq_str(hw->adminq.sq_last_status)); 4895 } 4896 4897 /** 4898 * ice_remove - Device removal routine 4899 * @pdev: PCI device information struct 4900 */ 4901 static void ice_remove(struct pci_dev *pdev) 4902 { 4903 struct ice_pf *pf = pci_get_drvdata(pdev); 4904 int i; 4905 4906 ice_devlink_unregister(pf); 4907 for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { 4908 if (!ice_is_reset_in_progress(pf->state)) 4909 break; 4910 msleep(100); 4911 } 4912 4913 ice_tc_indir_block_remove(pf); 4914 4915 if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { 4916 set_bit(ICE_VF_RESETS_DISABLED, pf->state); 4917 ice_free_vfs(pf); 4918 } 4919 4920 ice_service_task_stop(pf); 4921 4922 ice_aq_cancel_waiting_tasks(pf); 4923 ice_unplug_aux_dev(pf); 4924 if (pf->aux_idx >= 0) 4925 ida_free(&ice_aux_ida, pf->aux_idx); 4926 ice_devlink_unregister_params(pf); 4927 set_bit(ICE_DOWN, pf->state); 4928 4929 ice_deinit_lag(pf); 4930 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) 4931 ice_ptp_release(pf); 4932 if (ice_is_feature_supported(pf, ICE_F_GNSS)) 4933 ice_gnss_exit(pf); 4934 if (!ice_is_safe_mode(pf)) 4935 ice_remove_arfs(pf); 4936 ice_setup_mc_magic_wake(pf); 4937 ice_vsi_release_all(pf); 4938 mutex_destroy(&(&pf->hw)->fdir_fltr_lock); 4939 ice_set_wake(pf); 4940 ice_free_irq_msix_misc(pf); 4941 ice_for_each_vsi(pf, i) { 4942 if (!pf->vsi[i]) 4943 continue; 4944 ice_vsi_free_q_vectors(pf->vsi[i]); 4945 } 4946 ice_deinit_pf(pf); 4947 ice_devlink_destroy_regions(pf); 4948 ice_deinit_hw(&pf->hw); 4949 4950 /* Issue a PFR as part of the prescribed driver unload flow. Do not 4951 * do it via ice_schedule_reset() since there is no need to rebuild 4952 * and the service task is already stopped. 4953 */ 4954 ice_reset(&pf->hw, ICE_RESET_PFR); 4955 pci_wait_for_pending_transaction(pdev); 4956 ice_clear_interrupt_scheme(pf); 4957 pci_disable_pcie_error_reporting(pdev); 4958 pci_disable_device(pdev); 4959 } 4960 4961 /** 4962 * ice_shutdown - PCI callback for shutting down device 4963 * @pdev: PCI device information struct 4964 */ 4965 static void ice_shutdown(struct pci_dev *pdev) 4966 { 4967 struct ice_pf *pf = pci_get_drvdata(pdev); 4968 4969 ice_remove(pdev); 4970 4971 if (system_state == SYSTEM_POWER_OFF) { 4972 pci_wake_from_d3(pdev, pf->wol_ena); 4973 pci_set_power_state(pdev, PCI_D3hot); 4974 } 4975 } 4976 4977 #ifdef CONFIG_PM 4978 /** 4979 * ice_prepare_for_shutdown - prep for PCI shutdown 4980 * @pf: board private structure 4981 * 4982 * Inform or close all dependent features in prep for PCI device shutdown 4983 */ 4984 static void ice_prepare_for_shutdown(struct ice_pf *pf) 4985 { 4986 struct ice_hw *hw = &pf->hw; 4987 u32 v; 4988 4989 /* Notify VFs of impending reset */ 4990 if (ice_check_sq_alive(hw, &hw->mailboxq)) 4991 ice_vc_notify_reset(pf); 4992 4993 dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n"); 4994 4995 /* disable the VSIs and their queues that are not already DOWN */ 4996 ice_pf_dis_all_vsi(pf, false); 4997 4998 ice_for_each_vsi(pf, v) 4999 if (pf->vsi[v]) 5000 pf->vsi[v]->vsi_num = 0; 5001 5002 ice_shutdown_all_ctrlq(hw); 5003 } 5004 5005 /** 5006 * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme 5007 * @pf: board private structure to reinitialize 5008 * 5009 * This routine reinitialize interrupt scheme that was cleared during 5010 * power management suspend callback. 5011 * 5012 * This should be called during resume routine to re-allocate the q_vectors 5013 * and reacquire interrupts. 5014 */ 5015 static int ice_reinit_interrupt_scheme(struct ice_pf *pf) 5016 { 5017 struct device *dev = ice_pf_to_dev(pf); 5018 int ret, v; 5019 5020 /* Since we clear MSIX flag during suspend, we need to 5021 * set it back during resume... 5022 */ 5023 5024 ret = ice_init_interrupt_scheme(pf); 5025 if (ret) { 5026 dev_err(dev, "Failed to re-initialize interrupt %d\n", ret); 5027 return ret; 5028 } 5029 5030 /* Remap vectors and rings, after successful re-init interrupts */ 5031 ice_for_each_vsi(pf, v) { 5032 if (!pf->vsi[v]) 5033 continue; 5034 5035 ret = ice_vsi_alloc_q_vectors(pf->vsi[v]); 5036 if (ret) 5037 goto err_reinit; 5038 ice_vsi_map_rings_to_vectors(pf->vsi[v]); 5039 } 5040 5041 ret = ice_req_irq_msix_misc(pf); 5042 if (ret) { 5043 dev_err(dev, "Setting up misc vector failed after device suspend %d\n", 5044 ret); 5045 goto err_reinit; 5046 } 5047 5048 return 0; 5049 5050 err_reinit: 5051 while (v--) 5052 if (pf->vsi[v]) 5053 ice_vsi_free_q_vectors(pf->vsi[v]); 5054 5055 return ret; 5056 } 5057 5058 /** 5059 * ice_suspend 5060 * @dev: generic device information structure 5061 * 5062 * Power Management callback to quiesce the device and prepare 5063 * for D3 transition. 5064 */ 5065 static int __maybe_unused ice_suspend(struct device *dev) 5066 { 5067 struct pci_dev *pdev = to_pci_dev(dev); 5068 struct ice_pf *pf; 5069 int disabled, v; 5070 5071 pf = pci_get_drvdata(pdev); 5072 5073 if (!ice_pf_state_is_nominal(pf)) { 5074 dev_err(dev, "Device is not ready, no need to suspend it\n"); 5075 return -EBUSY; 5076 } 5077 5078 /* Stop watchdog tasks until resume completion. 5079 * Even though it is most likely that the service task is 5080 * disabled if the device is suspended or down, the service task's 5081 * state is controlled by a different state bit, and we should 5082 * store and honor whatever state that bit is in at this point. 5083 */ 5084 disabled = ice_service_task_stop(pf); 5085 5086 ice_unplug_aux_dev(pf); 5087 5088 /* Already suspended?, then there is nothing to do */ 5089 if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { 5090 if (!disabled) 5091 ice_service_task_restart(pf); 5092 return 0; 5093 } 5094 5095 if (test_bit(ICE_DOWN, pf->state) || 5096 ice_is_reset_in_progress(pf->state)) { 5097 dev_err(dev, "can't suspend device in reset or already down\n"); 5098 if (!disabled) 5099 ice_service_task_restart(pf); 5100 return 0; 5101 } 5102 5103 ice_setup_mc_magic_wake(pf); 5104 5105 ice_prepare_for_shutdown(pf); 5106 5107 ice_set_wake(pf); 5108 5109 /* Free vectors, clear the interrupt scheme and release IRQs 5110 * for proper hibernation, especially with large number of CPUs. 5111 * Otherwise hibernation might fail when mapping all the vectors back 5112 * to CPU0. 5113 */ 5114 ice_free_irq_msix_misc(pf); 5115 ice_for_each_vsi(pf, v) { 5116 if (!pf->vsi[v]) 5117 continue; 5118 ice_vsi_free_q_vectors(pf->vsi[v]); 5119 } 5120 ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); 5121 ice_clear_interrupt_scheme(pf); 5122 5123 pci_save_state(pdev); 5124 pci_wake_from_d3(pdev, pf->wol_ena); 5125 pci_set_power_state(pdev, PCI_D3hot); 5126 return 0; 5127 } 5128 5129 /** 5130 * ice_resume - PM callback for waking up from D3 5131 * @dev: generic device information structure 5132 */ 5133 static int __maybe_unused ice_resume(struct device *dev) 5134 { 5135 struct pci_dev *pdev = to_pci_dev(dev); 5136 enum ice_reset_req reset_type; 5137 struct ice_pf *pf; 5138 struct ice_hw *hw; 5139 int ret; 5140 5141 pci_set_power_state(pdev, PCI_D0); 5142 pci_restore_state(pdev); 5143 pci_save_state(pdev); 5144 5145 if (!pci_device_is_present(pdev)) 5146 return -ENODEV; 5147 5148 ret = pci_enable_device_mem(pdev); 5149 if (ret) { 5150 dev_err(dev, "Cannot enable device after suspend\n"); 5151 return ret; 5152 } 5153 5154 pf = pci_get_drvdata(pdev); 5155 hw = &pf->hw; 5156 5157 pf->wakeup_reason = rd32(hw, PFPM_WUS); 5158 ice_print_wake_reason(pf); 5159 5160 /* We cleared the interrupt scheme when we suspended, so we need to 5161 * restore it now to resume device functionality. 5162 */ 5163 ret = ice_reinit_interrupt_scheme(pf); 5164 if (ret) 5165 dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); 5166 5167 clear_bit(ICE_DOWN, pf->state); 5168 /* Now perform PF reset and rebuild */ 5169 reset_type = ICE_RESET_PFR; 5170 /* re-enable service task for reset, but allow reset to schedule it */ 5171 clear_bit(ICE_SERVICE_DIS, pf->state); 5172 5173 if (ice_schedule_reset(pf, reset_type)) 5174 dev_err(dev, "Reset during resume failed.\n"); 5175 5176 clear_bit(ICE_SUSPENDED, pf->state); 5177 ice_service_task_restart(pf); 5178 5179 /* Restart the service task */ 5180 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); 5181 5182 return 0; 5183 } 5184 #endif /* CONFIG_PM */ 5185 5186 /** 5187 * ice_pci_err_detected - warning that PCI error has been detected 5188 * @pdev: PCI device information struct 5189 * @err: the type of PCI error 5190 * 5191 * Called to warn that something happened on the PCI bus and the error handling 5192 * is in progress. Allows the driver to gracefully prepare/handle PCI errors. 5193 */ 5194 static pci_ers_result_t 5195 ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) 5196 { 5197 struct ice_pf *pf = pci_get_drvdata(pdev); 5198 5199 if (!pf) { 5200 dev_err(&pdev->dev, "%s: unrecoverable device error %d\n", 5201 __func__, err); 5202 return PCI_ERS_RESULT_DISCONNECT; 5203 } 5204 5205 if (!test_bit(ICE_SUSPENDED, pf->state)) { 5206 ice_service_task_stop(pf); 5207 5208 if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { 5209 set_bit(ICE_PFR_REQ, pf->state); 5210 ice_prepare_for_reset(pf, ICE_RESET_PFR); 5211 } 5212 } 5213 5214 return PCI_ERS_RESULT_NEED_RESET; 5215 } 5216 5217 /** 5218 * ice_pci_err_slot_reset - a PCI slot reset has just happened 5219 * @pdev: PCI device information struct 5220 * 5221 * Called to determine if the driver can recover from the PCI slot reset by 5222 * using a register read to determine if the device is recoverable. 5223 */ 5224 static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) 5225 { 5226 struct ice_pf *pf = pci_get_drvdata(pdev); 5227 pci_ers_result_t result; 5228 int err; 5229 u32 reg; 5230 5231 err = pci_enable_device_mem(pdev); 5232 if (err) { 5233 dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n", 5234 err); 5235 result = PCI_ERS_RESULT_DISCONNECT; 5236 } else { 5237 pci_set_master(pdev); 5238 pci_restore_state(pdev); 5239 pci_save_state(pdev); 5240 pci_wake_from_d3(pdev, false); 5241 5242 /* Check for life */ 5243 reg = rd32(&pf->hw, GLGEN_RTRIG); 5244 if (!reg) 5245 result = PCI_ERS_RESULT_RECOVERED; 5246 else 5247 result = PCI_ERS_RESULT_DISCONNECT; 5248 } 5249 5250 err = pci_aer_clear_nonfatal_status(pdev); 5251 if (err) 5252 dev_dbg(&pdev->dev, "pci_aer_clear_nonfatal_status() failed, error %d\n", 5253 err); 5254 /* non-fatal, continue */ 5255 5256 return result; 5257 } 5258 5259 /** 5260 * ice_pci_err_resume - restart operations after PCI error recovery 5261 * @pdev: PCI device information struct 5262 * 5263 * Called to allow the driver to bring things back up after PCI error and/or 5264 * reset recovery have finished 5265 */ 5266 static void ice_pci_err_resume(struct pci_dev *pdev) 5267 { 5268 struct ice_pf *pf = pci_get_drvdata(pdev); 5269 5270 if (!pf) { 5271 dev_err(&pdev->dev, "%s failed, device is unrecoverable\n", 5272 __func__); 5273 return; 5274 } 5275 5276 if (test_bit(ICE_SUSPENDED, pf->state)) { 5277 dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n", 5278 __func__); 5279 return; 5280 } 5281 5282 ice_restore_all_vfs_msi_state(pdev); 5283 5284 ice_do_reset(pf, ICE_RESET_PFR); 5285 ice_service_task_restart(pf); 5286 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); 5287 } 5288 5289 /** 5290 * ice_pci_err_reset_prepare - prepare device driver for PCI reset 5291 * @pdev: PCI device information struct 5292 */ 5293 static void ice_pci_err_reset_prepare(struct pci_dev *pdev) 5294 { 5295 struct ice_pf *pf = pci_get_drvdata(pdev); 5296 5297 if (!test_bit(ICE_SUSPENDED, pf->state)) { 5298 ice_service_task_stop(pf); 5299 5300 if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { 5301 set_bit(ICE_PFR_REQ, pf->state); 5302 ice_prepare_for_reset(pf, ICE_RESET_PFR); 5303 } 5304 } 5305 } 5306 5307 /** 5308 * ice_pci_err_reset_done - PCI reset done, device driver reset can begin 5309 * @pdev: PCI device information struct 5310 */ 5311 static void ice_pci_err_reset_done(struct pci_dev *pdev) 5312 { 5313 ice_pci_err_resume(pdev); 5314 } 5315 5316 /* ice_pci_tbl - PCI Device ID Table 5317 * 5318 * Wildcard entries (PCI_ANY_ID) should come last 5319 * Last entry must be all 0s 5320 * 5321 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, 5322 * Class, Class Mask, private data (not used) } 5323 */ 5324 static const struct pci_device_id ice_pci_tbl[] = { 5325 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, 5326 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, 5327 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, 5328 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 }, 5329 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 }, 5330 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 }, 5331 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 }, 5332 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 }, 5333 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP), 0 }, 5334 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T), 0 }, 5335 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII), 0 }, 5336 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 }, 5337 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 }, 5338 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 }, 5339 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 }, 5340 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 }, 5341 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE), 0 }, 5342 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 }, 5343 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 }, 5344 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 }, 5345 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE), 0 }, 5346 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP), 0 }, 5347 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 }, 5348 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 }, 5349 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 }, 5350 /* required last entry */ 5351 { 0, } 5352 }; 5353 MODULE_DEVICE_TABLE(pci, ice_pci_tbl); 5354 5355 static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); 5356 5357 static const struct pci_error_handlers ice_pci_err_handler = { 5358 .error_detected = ice_pci_err_detected, 5359 .slot_reset = ice_pci_err_slot_reset, 5360 .reset_prepare = ice_pci_err_reset_prepare, 5361 .reset_done = ice_pci_err_reset_done, 5362 .resume = ice_pci_err_resume 5363 }; 5364 5365 static struct pci_driver ice_driver = { 5366 .name = KBUILD_MODNAME, 5367 .id_table = ice_pci_tbl, 5368 .probe = ice_probe, 5369 .remove = ice_remove, 5370 #ifdef CONFIG_PM 5371 .driver.pm = &ice_pm_ops, 5372 #endif /* CONFIG_PM */ 5373 .shutdown = ice_shutdown, 5374 .sriov_configure = ice_sriov_configure, 5375 .err_handler = &ice_pci_err_handler 5376 }; 5377 5378 /** 5379 * ice_module_init - Driver registration routine 5380 * 5381 * ice_module_init is the first routine called when the driver is 5382 * loaded. All it does is register with the PCI subsystem. 5383 */ 5384 static int __init ice_module_init(void) 5385 { 5386 int status; 5387 5388 pr_info("%s\n", ice_driver_string); 5389 pr_info("%s\n", ice_copyright); 5390 5391 ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME); 5392 if (!ice_wq) { 5393 pr_err("Failed to create workqueue\n"); 5394 return -ENOMEM; 5395 } 5396 5397 status = pci_register_driver(&ice_driver); 5398 if (status) { 5399 pr_err("failed to register PCI driver, err %d\n", status); 5400 destroy_workqueue(ice_wq); 5401 } 5402 5403 return status; 5404 } 5405 module_init(ice_module_init); 5406 5407 /** 5408 * ice_module_exit - Driver exit cleanup routine 5409 * 5410 * ice_module_exit is called just before the driver is removed 5411 * from memory. 5412 */ 5413 static void __exit ice_module_exit(void) 5414 { 5415 pci_unregister_driver(&ice_driver); 5416 destroy_workqueue(ice_wq); 5417 pr_info("module unloaded\n"); 5418 } 5419 module_exit(ice_module_exit); 5420 5421 /** 5422 * ice_set_mac_address - NDO callback to set MAC address 5423 * @netdev: network interface device structure 5424 * @pi: pointer to an address structure 5425 * 5426 * Returns 0 on success, negative on failure 5427 */ 5428 static int ice_set_mac_address(struct net_device *netdev, void *pi) 5429 { 5430 struct ice_netdev_priv *np = netdev_priv(netdev); 5431 struct ice_vsi *vsi = np->vsi; 5432 struct ice_pf *pf = vsi->back; 5433 struct ice_hw *hw = &pf->hw; 5434 struct sockaddr *addr = pi; 5435 u8 old_mac[ETH_ALEN]; 5436 u8 flags = 0; 5437 u8 *mac; 5438 int err; 5439 5440 mac = (u8 *)addr->sa_data; 5441 5442 if (!is_valid_ether_addr(mac)) 5443 return -EADDRNOTAVAIL; 5444 5445 if (ether_addr_equal(netdev->dev_addr, mac)) { 5446 netdev_dbg(netdev, "already using mac %pM\n", mac); 5447 return 0; 5448 } 5449 5450 if (test_bit(ICE_DOWN, pf->state) || 5451 ice_is_reset_in_progress(pf->state)) { 5452 netdev_err(netdev, "can't set mac %pM. device not ready\n", 5453 mac); 5454 return -EBUSY; 5455 } 5456 5457 if (ice_chnl_dmac_fltr_cnt(pf)) { 5458 netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n", 5459 mac); 5460 return -EAGAIN; 5461 } 5462 5463 netif_addr_lock_bh(netdev); 5464 ether_addr_copy(old_mac, netdev->dev_addr); 5465 /* change the netdev's MAC address */ 5466 eth_hw_addr_set(netdev, mac); 5467 netif_addr_unlock_bh(netdev); 5468 5469 /* Clean up old MAC filter. Not an error if old filter doesn't exist */ 5470 err = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI); 5471 if (err && err != -ENOENT) { 5472 err = -EADDRNOTAVAIL; 5473 goto err_update_filters; 5474 } 5475 5476 /* Add filter for new MAC. If filter exists, return success */ 5477 err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); 5478 if (err == -EEXIST) 5479 /* Although this MAC filter is already present in hardware it's 5480 * possible in some cases (e.g. bonding) that dev_addr was 5481 * modified outside of the driver and needs to be restored back 5482 * to this value. 5483 */ 5484 netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); 5485 else if (err) 5486 /* error if the new filter addition failed */ 5487 err = -EADDRNOTAVAIL; 5488 5489 err_update_filters: 5490 if (err) { 5491 netdev_err(netdev, "can't set MAC %pM. filter update failed\n", 5492 mac); 5493 netif_addr_lock_bh(netdev); 5494 eth_hw_addr_set(netdev, old_mac); 5495 netif_addr_unlock_bh(netdev); 5496 return err; 5497 } 5498 5499 netdev_dbg(vsi->netdev, "updated MAC address to %pM\n", 5500 netdev->dev_addr); 5501 5502 /* write new MAC address to the firmware */ 5503 flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; 5504 err = ice_aq_manage_mac_write(hw, mac, flags, NULL); 5505 if (err) { 5506 netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n", 5507 mac, err); 5508 } 5509 return 0; 5510 } 5511 5512 /** 5513 * ice_set_rx_mode - NDO callback to set the netdev filters 5514 * @netdev: network interface device structure 5515 */ 5516 static void ice_set_rx_mode(struct net_device *netdev) 5517 { 5518 struct ice_netdev_priv *np = netdev_priv(netdev); 5519 struct ice_vsi *vsi = np->vsi; 5520 5521 if (!vsi) 5522 return; 5523 5524 /* Set the flags to synchronize filters 5525 * ndo_set_rx_mode may be triggered even without a change in netdev 5526 * flags 5527 */ 5528 set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); 5529 set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); 5530 set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags); 5531 5532 /* schedule our worker thread which will take care of 5533 * applying the new filter changes 5534 */ 5535 ice_service_task_schedule(vsi->back); 5536 } 5537 5538 /** 5539 * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate 5540 * @netdev: network interface device structure 5541 * @queue_index: Queue ID 5542 * @maxrate: maximum bandwidth in Mbps 5543 */ 5544 static int 5545 ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) 5546 { 5547 struct ice_netdev_priv *np = netdev_priv(netdev); 5548 struct ice_vsi *vsi = np->vsi; 5549 u16 q_handle; 5550 int status; 5551 u8 tc; 5552 5553 /* Validate maxrate requested is within permitted range */ 5554 if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) { 5555 netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n", 5556 maxrate, queue_index); 5557 return -EINVAL; 5558 } 5559 5560 q_handle = vsi->tx_rings[queue_index]->q_handle; 5561 tc = ice_dcb_get_tc(vsi, queue_index); 5562 5563 /* Set BW back to default, when user set maxrate to 0 */ 5564 if (!maxrate) 5565 status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, 5566 q_handle, ICE_MAX_BW); 5567 else 5568 status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc, 5569 q_handle, ICE_MAX_BW, maxrate * 1000); 5570 if (status) 5571 netdev_err(netdev, "Unable to set Tx max rate, error %d\n", 5572 status); 5573 5574 return status; 5575 } 5576 5577 /** 5578 * ice_fdb_add - add an entry to the hardware database 5579 * @ndm: the input from the stack 5580 * @tb: pointer to array of nladdr (unused) 5581 * @dev: the net device pointer 5582 * @addr: the MAC address entry being added 5583 * @vid: VLAN ID 5584 * @flags: instructions from stack about fdb operation 5585 * @extack: netlink extended ack 5586 */ 5587 static int 5588 ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], 5589 struct net_device *dev, const unsigned char *addr, u16 vid, 5590 u16 flags, struct netlink_ext_ack __always_unused *extack) 5591 { 5592 int err; 5593 5594 if (vid) { 5595 netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n"); 5596 return -EINVAL; 5597 } 5598 if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { 5599 netdev_err(dev, "FDB only supports static addresses\n"); 5600 return -EINVAL; 5601 } 5602 5603 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) 5604 err = dev_uc_add_excl(dev, addr); 5605 else if (is_multicast_ether_addr(addr)) 5606 err = dev_mc_add_excl(dev, addr); 5607 else 5608 err = -EINVAL; 5609 5610 /* Only return duplicate errors if NLM_F_EXCL is set */ 5611 if (err == -EEXIST && !(flags & NLM_F_EXCL)) 5612 err = 0; 5613 5614 return err; 5615 } 5616 5617 /** 5618 * ice_fdb_del - delete an entry from the hardware database 5619 * @ndm: the input from the stack 5620 * @tb: pointer to array of nladdr (unused) 5621 * @dev: the net device pointer 5622 * @addr: the MAC address entry being added 5623 * @vid: VLAN ID 5624 */ 5625 static int 5626 ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], 5627 struct net_device *dev, const unsigned char *addr, 5628 __always_unused u16 vid) 5629 { 5630 int err; 5631 5632 if (ndm->ndm_state & NUD_PERMANENT) { 5633 netdev_err(dev, "FDB only supports static addresses\n"); 5634 return -EINVAL; 5635 } 5636 5637 if (is_unicast_ether_addr(addr)) 5638 err = dev_uc_del(dev, addr); 5639 else if (is_multicast_ether_addr(addr)) 5640 err = dev_mc_del(dev, addr); 5641 else 5642 err = -EINVAL; 5643 5644 return err; 5645 } 5646 5647 #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ 5648 NETIF_F_HW_VLAN_CTAG_TX | \ 5649 NETIF_F_HW_VLAN_STAG_RX | \ 5650 NETIF_F_HW_VLAN_STAG_TX) 5651 5652 #define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ 5653 NETIF_F_HW_VLAN_STAG_FILTER) 5654 5655 /** 5656 * ice_fix_features - fix the netdev features flags based on device limitations 5657 * @netdev: ptr to the netdev that flags are being fixed on 5658 * @features: features that need to be checked and possibly fixed 5659 * 5660 * Make sure any fixups are made to features in this callback. This enables the 5661 * driver to not have to check unsupported configurations throughout the driver 5662 * because that's the responsiblity of this callback. 5663 * 5664 * Single VLAN Mode (SVM) Supported Features: 5665 * NETIF_F_HW_VLAN_CTAG_FILTER 5666 * NETIF_F_HW_VLAN_CTAG_RX 5667 * NETIF_F_HW_VLAN_CTAG_TX 5668 * 5669 * Double VLAN Mode (DVM) Supported Features: 5670 * NETIF_F_HW_VLAN_CTAG_FILTER 5671 * NETIF_F_HW_VLAN_CTAG_RX 5672 * NETIF_F_HW_VLAN_CTAG_TX 5673 * 5674 * NETIF_F_HW_VLAN_STAG_FILTER 5675 * NETIF_HW_VLAN_STAG_RX 5676 * NETIF_HW_VLAN_STAG_TX 5677 * 5678 * Features that need fixing: 5679 * Cannot simultaneously enable CTAG and STAG stripping and/or insertion. 5680 * These are mutually exlusive as the VSI context cannot support multiple 5681 * VLAN ethertypes simultaneously for stripping and/or insertion. If this 5682 * is not done, then default to clearing the requested STAG offload 5683 * settings. 5684 * 5685 * All supported filtering has to be enabled or disabled together. For 5686 * example, in DVM, CTAG and STAG filtering have to be enabled and disabled 5687 * together. If this is not done, then default to VLAN filtering disabled. 5688 * These are mutually exclusive as there is currently no way to 5689 * enable/disable VLAN filtering based on VLAN ethertype when using VLAN 5690 * prune rules. 5691 */ 5692 static netdev_features_t 5693 ice_fix_features(struct net_device *netdev, netdev_features_t features) 5694 { 5695 struct ice_netdev_priv *np = netdev_priv(netdev); 5696 netdev_features_t supported_vlan_filtering; 5697 netdev_features_t requested_vlan_filtering; 5698 struct ice_vsi *vsi = np->vsi; 5699 5700 requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES; 5701 5702 /* make sure supported_vlan_filtering works for both SVM and DVM */ 5703 supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER; 5704 if (ice_is_dvm_ena(&vsi->back->hw)) 5705 supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER; 5706 5707 if (requested_vlan_filtering && 5708 requested_vlan_filtering != supported_vlan_filtering) { 5709 if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) { 5710 netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n"); 5711 features |= supported_vlan_filtering; 5712 } else { 5713 netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n"); 5714 features &= ~supported_vlan_filtering; 5715 } 5716 } 5717 5718 if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) && 5719 (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) { 5720 netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n"); 5721 features &= ~(NETIF_F_HW_VLAN_STAG_RX | 5722 NETIF_F_HW_VLAN_STAG_TX); 5723 } 5724 5725 return features; 5726 } 5727 5728 /** 5729 * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI 5730 * @vsi: PF's VSI 5731 * @features: features used to determine VLAN offload settings 5732 * 5733 * First, determine the vlan_ethertype based on the VLAN offload bits in 5734 * features. Then determine if stripping and insertion should be enabled or 5735 * disabled. Finally enable or disable VLAN stripping and insertion. 5736 */ 5737 static int 5738 ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features) 5739 { 5740 bool enable_stripping = true, enable_insertion = true; 5741 struct ice_vsi_vlan_ops *vlan_ops; 5742 int strip_err = 0, insert_err = 0; 5743 u16 vlan_ethertype = 0; 5744 5745 vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); 5746 5747 if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX)) 5748 vlan_ethertype = ETH_P_8021AD; 5749 else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) 5750 vlan_ethertype = ETH_P_8021Q; 5751 5752 if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX))) 5753 enable_stripping = false; 5754 if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX))) 5755 enable_insertion = false; 5756 5757 if (enable_stripping) 5758 strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype); 5759 else 5760 strip_err = vlan_ops->dis_stripping(vsi); 5761 5762 if (enable_insertion) 5763 insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype); 5764 else 5765 insert_err = vlan_ops->dis_insertion(vsi); 5766 5767 if (strip_err || insert_err) 5768 return -EIO; 5769 5770 return 0; 5771 } 5772 5773 /** 5774 * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI 5775 * @vsi: PF's VSI 5776 * @features: features used to determine VLAN filtering settings 5777 * 5778 * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the 5779 * features. 5780 */ 5781 static int 5782 ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features) 5783 { 5784 struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); 5785 int err = 0; 5786 5787 /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking 5788 * if either bit is set 5789 */ 5790 if (features & 5791 (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) 5792 err = vlan_ops->ena_rx_filtering(vsi); 5793 else 5794 err = vlan_ops->dis_rx_filtering(vsi); 5795 5796 return err; 5797 } 5798 5799 /** 5800 * ice_set_vlan_features - set VLAN settings based on suggested feature set 5801 * @netdev: ptr to the netdev being adjusted 5802 * @features: the feature set that the stack is suggesting 5803 * 5804 * Only update VLAN settings if the requested_vlan_features are different than 5805 * the current_vlan_features. 5806 */ 5807 static int 5808 ice_set_vlan_features(struct net_device *netdev, netdev_features_t features) 5809 { 5810 netdev_features_t current_vlan_features, requested_vlan_features; 5811 struct ice_netdev_priv *np = netdev_priv(netdev); 5812 struct ice_vsi *vsi = np->vsi; 5813 int err; 5814 5815 current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES; 5816 requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES; 5817 if (current_vlan_features ^ requested_vlan_features) { 5818 err = ice_set_vlan_offload_features(vsi, features); 5819 if (err) 5820 return err; 5821 } 5822 5823 current_vlan_features = netdev->features & 5824 NETIF_VLAN_FILTERING_FEATURES; 5825 requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES; 5826 if (current_vlan_features ^ requested_vlan_features) { 5827 err = ice_set_vlan_filtering_features(vsi, features); 5828 if (err) 5829 return err; 5830 } 5831 5832 return 0; 5833 } 5834 5835 /** 5836 * ice_set_features - set the netdev feature flags 5837 * @netdev: ptr to the netdev being adjusted 5838 * @features: the feature set that the stack is suggesting 5839 */ 5840 static int 5841 ice_set_features(struct net_device *netdev, netdev_features_t features) 5842 { 5843 struct ice_netdev_priv *np = netdev_priv(netdev); 5844 struct ice_vsi *vsi = np->vsi; 5845 struct ice_pf *pf = vsi->back; 5846 int ret = 0; 5847 5848 /* Don't set any netdev advanced features with device in Safe Mode */ 5849 if (ice_is_safe_mode(vsi->back)) { 5850 dev_err(ice_pf_to_dev(vsi->back), "Device is in Safe Mode - not enabling advanced netdev features\n"); 5851 return ret; 5852 } 5853 5854 /* Do not change setting during reset */ 5855 if (ice_is_reset_in_progress(pf->state)) { 5856 dev_err(ice_pf_to_dev(vsi->back), "Device is resetting, changing advanced netdev features temporarily unavailable.\n"); 5857 return -EBUSY; 5858 } 5859 5860 /* Multiple features can be changed in one call so keep features in 5861 * separate if/else statements to guarantee each feature is checked 5862 */ 5863 if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) 5864 ice_vsi_manage_rss_lut(vsi, true); 5865 else if (!(features & NETIF_F_RXHASH) && 5866 netdev->features & NETIF_F_RXHASH) 5867 ice_vsi_manage_rss_lut(vsi, false); 5868 5869 ret = ice_set_vlan_features(netdev, features); 5870 if (ret) 5871 return ret; 5872 5873 if ((features & NETIF_F_NTUPLE) && 5874 !(netdev->features & NETIF_F_NTUPLE)) { 5875 ice_vsi_manage_fdir(vsi, true); 5876 ice_init_arfs(vsi); 5877 } else if (!(features & NETIF_F_NTUPLE) && 5878 (netdev->features & NETIF_F_NTUPLE)) { 5879 ice_vsi_manage_fdir(vsi, false); 5880 ice_clear_arfs(vsi); 5881 } 5882 5883 /* don't turn off hw_tc_offload when ADQ is already enabled */ 5884 if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) { 5885 dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n"); 5886 return -EACCES; 5887 } 5888 5889 if ((features & NETIF_F_HW_TC) && 5890 !(netdev->features & NETIF_F_HW_TC)) 5891 set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); 5892 else 5893 clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); 5894 5895 return 0; 5896 } 5897 5898 /** 5899 * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI 5900 * @vsi: VSI to setup VLAN properties for 5901 */ 5902 static int ice_vsi_vlan_setup(struct ice_vsi *vsi) 5903 { 5904 int err; 5905 5906 err = ice_set_vlan_offload_features(vsi, vsi->netdev->features); 5907 if (err) 5908 return err; 5909 5910 err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features); 5911 if (err) 5912 return err; 5913 5914 return ice_vsi_add_vlan_zero(vsi); 5915 } 5916 5917 /** 5918 * ice_vsi_cfg - Setup the VSI 5919 * @vsi: the VSI being configured 5920 * 5921 * Return 0 on success and negative value on error 5922 */ 5923 int ice_vsi_cfg(struct ice_vsi *vsi) 5924 { 5925 int err; 5926 5927 if (vsi->netdev) { 5928 ice_set_rx_mode(vsi->netdev); 5929 5930 err = ice_vsi_vlan_setup(vsi); 5931 5932 if (err) 5933 return err; 5934 } 5935 ice_vsi_cfg_dcb_rings(vsi); 5936 5937 err = ice_vsi_cfg_lan_txqs(vsi); 5938 if (!err && ice_is_xdp_ena_vsi(vsi)) 5939 err = ice_vsi_cfg_xdp_txqs(vsi); 5940 if (!err) 5941 err = ice_vsi_cfg_rxqs(vsi); 5942 5943 return err; 5944 } 5945 5946 /* THEORY OF MODERATION: 5947 * The ice driver hardware works differently than the hardware that DIMLIB was 5948 * originally made for. ice hardware doesn't have packet count limits that 5949 * can trigger an interrupt, but it *does* have interrupt rate limit support, 5950 * which is hard-coded to a limit of 250,000 ints/second. 5951 * If not using dynamic moderation, the INTRL value can be modified 5952 * by ethtool rx-usecs-high. 5953 */ 5954 struct ice_dim { 5955 /* the throttle rate for interrupts, basically worst case delay before 5956 * an initial interrupt fires, value is stored in microseconds. 5957 */ 5958 u16 itr; 5959 }; 5960 5961 /* Make a different profile for Rx that doesn't allow quite so aggressive 5962 * moderation at the high end (it maxes out at 126us or about 8k interrupts a 5963 * second. 5964 */ 5965 static const struct ice_dim rx_profile[] = { 5966 {2}, /* 500,000 ints/s, capped at 250K by INTRL */ 5967 {8}, /* 125,000 ints/s */ 5968 {16}, /* 62,500 ints/s */ 5969 {62}, /* 16,129 ints/s */ 5970 {126} /* 7,936 ints/s */ 5971 }; 5972 5973 /* The transmit profile, which has the same sorts of values 5974 * as the previous struct 5975 */ 5976 static const struct ice_dim tx_profile[] = { 5977 {2}, /* 500,000 ints/s, capped at 250K by INTRL */ 5978 {8}, /* 125,000 ints/s */ 5979 {40}, /* 16,125 ints/s */ 5980 {128}, /* 7,812 ints/s */ 5981 {256} /* 3,906 ints/s */ 5982 }; 5983 5984 static void ice_tx_dim_work(struct work_struct *work) 5985 { 5986 struct ice_ring_container *rc; 5987 struct dim *dim; 5988 u16 itr; 5989 5990 dim = container_of(work, struct dim, work); 5991 rc = (struct ice_ring_container *)dim->priv; 5992 5993 WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile)); 5994 5995 /* look up the values in our local table */ 5996 itr = tx_profile[dim->profile_ix].itr; 5997 5998 ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim); 5999 ice_write_itr(rc, itr); 6000 6001 dim->state = DIM_START_MEASURE; 6002 } 6003 6004 static void ice_rx_dim_work(struct work_struct *work) 6005 { 6006 struct ice_ring_container *rc; 6007 struct dim *dim; 6008 u16 itr; 6009 6010 dim = container_of(work, struct dim, work); 6011 rc = (struct ice_ring_container *)dim->priv; 6012 6013 WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile)); 6014 6015 /* look up the values in our local table */ 6016 itr = rx_profile[dim->profile_ix].itr; 6017 6018 ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim); 6019 ice_write_itr(rc, itr); 6020 6021 dim->state = DIM_START_MEASURE; 6022 } 6023 6024 #define ICE_DIM_DEFAULT_PROFILE_IX 1 6025 6026 /** 6027 * ice_init_moderation - set up interrupt moderation 6028 * @q_vector: the vector containing rings to be configured 6029 * 6030 * Set up interrupt moderation registers, with the intent to do the right thing 6031 * when called from reset or from probe, and whether or not dynamic moderation 6032 * is enabled or not. Take special care to write all the registers in both 6033 * dynamic moderation mode or not in order to make sure hardware is in a known 6034 * state. 6035 */ 6036 static void ice_init_moderation(struct ice_q_vector *q_vector) 6037 { 6038 struct ice_ring_container *rc; 6039 bool tx_dynamic, rx_dynamic; 6040 6041 rc = &q_vector->tx; 6042 INIT_WORK(&rc->dim.work, ice_tx_dim_work); 6043 rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 6044 rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; 6045 rc->dim.priv = rc; 6046 tx_dynamic = ITR_IS_DYNAMIC(rc); 6047 6048 /* set the initial TX ITR to match the above */ 6049 ice_write_itr(rc, tx_dynamic ? 6050 tx_profile[rc->dim.profile_ix].itr : rc->itr_setting); 6051 6052 rc = &q_vector->rx; 6053 INIT_WORK(&rc->dim.work, ice_rx_dim_work); 6054 rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 6055 rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; 6056 rc->dim.priv = rc; 6057 rx_dynamic = ITR_IS_DYNAMIC(rc); 6058 6059 /* set the initial RX ITR to match the above */ 6060 ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr : 6061 rc->itr_setting); 6062 6063 ice_set_q_vector_intrl(q_vector); 6064 } 6065 6066 /** 6067 * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI 6068 * @vsi: the VSI being configured 6069 */ 6070 static void ice_napi_enable_all(struct ice_vsi *vsi) 6071 { 6072 int q_idx; 6073 6074 if (!vsi->netdev) 6075 return; 6076 6077 ice_for_each_q_vector(vsi, q_idx) { 6078 struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; 6079 6080 ice_init_moderation(q_vector); 6081 6082 if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) 6083 napi_enable(&q_vector->napi); 6084 } 6085 } 6086 6087 /** 6088 * ice_up_complete - Finish the last steps of bringing up a connection 6089 * @vsi: The VSI being configured 6090 * 6091 * Return 0 on success and negative value on error 6092 */ 6093 static int ice_up_complete(struct ice_vsi *vsi) 6094 { 6095 struct ice_pf *pf = vsi->back; 6096 int err; 6097 6098 ice_vsi_cfg_msix(vsi); 6099 6100 /* Enable only Rx rings, Tx rings were enabled by the FW when the 6101 * Tx queue group list was configured and the context bits were 6102 * programmed using ice_vsi_cfg_txqs 6103 */ 6104 err = ice_vsi_start_all_rx_rings(vsi); 6105 if (err) 6106 return err; 6107 6108 clear_bit(ICE_VSI_DOWN, vsi->state); 6109 ice_napi_enable_all(vsi); 6110 ice_vsi_ena_irq(vsi); 6111 6112 if (vsi->port_info && 6113 (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) && 6114 vsi->netdev) { 6115 ice_print_link_msg(vsi, true); 6116 netif_tx_start_all_queues(vsi->netdev); 6117 netif_carrier_on(vsi->netdev); 6118 if (!ice_is_e810(&pf->hw)) 6119 ice_ptp_link_change(pf, pf->hw.pf_id, true); 6120 } 6121 6122 /* clear this now, and the first stats read will be used as baseline */ 6123 vsi->stat_offsets_loaded = false; 6124 6125 ice_service_task_schedule(pf); 6126 6127 return 0; 6128 } 6129 6130 /** 6131 * ice_up - Bring the connection back up after being down 6132 * @vsi: VSI being configured 6133 */ 6134 int ice_up(struct ice_vsi *vsi) 6135 { 6136 int err; 6137 6138 err = ice_vsi_cfg(vsi); 6139 if (!err) 6140 err = ice_up_complete(vsi); 6141 6142 return err; 6143 } 6144 6145 /** 6146 * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring 6147 * @syncp: pointer to u64_stats_sync 6148 * @stats: stats that pkts and bytes count will be taken from 6149 * @pkts: packets stats counter 6150 * @bytes: bytes stats counter 6151 * 6152 * This function fetches stats from the ring considering the atomic operations 6153 * that needs to be performed to read u64 values in 32 bit machine. 6154 */ 6155 void 6156 ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, 6157 struct ice_q_stats stats, u64 *pkts, u64 *bytes) 6158 { 6159 unsigned int start; 6160 6161 do { 6162 start = u64_stats_fetch_begin_irq(syncp); 6163 *pkts = stats.pkts; 6164 *bytes = stats.bytes; 6165 } while (u64_stats_fetch_retry_irq(syncp, start)); 6166 } 6167 6168 /** 6169 * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters 6170 * @vsi: the VSI to be updated 6171 * @vsi_stats: the stats struct to be updated 6172 * @rings: rings to work on 6173 * @count: number of rings 6174 */ 6175 static void 6176 ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, 6177 struct rtnl_link_stats64 *vsi_stats, 6178 struct ice_tx_ring **rings, u16 count) 6179 { 6180 u16 i; 6181 6182 for (i = 0; i < count; i++) { 6183 struct ice_tx_ring *ring; 6184 u64 pkts = 0, bytes = 0; 6185 6186 ring = READ_ONCE(rings[i]); 6187 if (!ring) 6188 continue; 6189 ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); 6190 vsi_stats->tx_packets += pkts; 6191 vsi_stats->tx_bytes += bytes; 6192 vsi->tx_restart += ring->tx_stats.restart_q; 6193 vsi->tx_busy += ring->tx_stats.tx_busy; 6194 vsi->tx_linearize += ring->tx_stats.tx_linearize; 6195 } 6196 } 6197 6198 /** 6199 * ice_update_vsi_ring_stats - Update VSI stats counters 6200 * @vsi: the VSI to be updated 6201 */ 6202 static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) 6203 { 6204 struct rtnl_link_stats64 *vsi_stats; 6205 u64 pkts, bytes; 6206 int i; 6207 6208 vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC); 6209 if (!vsi_stats) 6210 return; 6211 6212 /* reset non-netdev (extended) stats */ 6213 vsi->tx_restart = 0; 6214 vsi->tx_busy = 0; 6215 vsi->tx_linearize = 0; 6216 vsi->rx_buf_failed = 0; 6217 vsi->rx_page_failed = 0; 6218 6219 rcu_read_lock(); 6220 6221 /* update Tx rings counters */ 6222 ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings, 6223 vsi->num_txq); 6224 6225 /* update Rx rings counters */ 6226 ice_for_each_rxq(vsi, i) { 6227 struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); 6228 6229 ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); 6230 vsi_stats->rx_packets += pkts; 6231 vsi_stats->rx_bytes += bytes; 6232 vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; 6233 vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; 6234 } 6235 6236 /* update XDP Tx rings counters */ 6237 if (ice_is_xdp_ena_vsi(vsi)) 6238 ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings, 6239 vsi->num_xdp_txq); 6240 6241 rcu_read_unlock(); 6242 6243 vsi->net_stats.tx_packets = vsi_stats->tx_packets; 6244 vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; 6245 vsi->net_stats.rx_packets = vsi_stats->rx_packets; 6246 vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; 6247 6248 kfree(vsi_stats); 6249 } 6250 6251 /** 6252 * ice_update_vsi_stats - Update VSI stats counters 6253 * @vsi: the VSI to be updated 6254 */ 6255 void ice_update_vsi_stats(struct ice_vsi *vsi) 6256 { 6257 struct rtnl_link_stats64 *cur_ns = &vsi->net_stats; 6258 struct ice_eth_stats *cur_es = &vsi->eth_stats; 6259 struct ice_pf *pf = vsi->back; 6260 6261 if (test_bit(ICE_VSI_DOWN, vsi->state) || 6262 test_bit(ICE_CFG_BUSY, pf->state)) 6263 return; 6264 6265 /* get stats as recorded by Tx/Rx rings */ 6266 ice_update_vsi_ring_stats(vsi); 6267 6268 /* get VSI stats as recorded by the hardware */ 6269 ice_update_eth_stats(vsi); 6270 6271 cur_ns->tx_errors = cur_es->tx_errors; 6272 cur_ns->rx_dropped = cur_es->rx_discards; 6273 cur_ns->tx_dropped = cur_es->tx_discards; 6274 cur_ns->multicast = cur_es->rx_multicast; 6275 6276 /* update some more netdev stats if this is main VSI */ 6277 if (vsi->type == ICE_VSI_PF) { 6278 cur_ns->rx_crc_errors = pf->stats.crc_errors; 6279 cur_ns->rx_errors = pf->stats.crc_errors + 6280 pf->stats.illegal_bytes + 6281 pf->stats.rx_len_errors + 6282 pf->stats.rx_undersize + 6283 pf->hw_csum_rx_error + 6284 pf->stats.rx_jabber + 6285 pf->stats.rx_fragments + 6286 pf->stats.rx_oversize; 6287 cur_ns->rx_length_errors = pf->stats.rx_len_errors; 6288 /* record drops from the port level */ 6289 cur_ns->rx_missed_errors = pf->stats.eth.rx_discards; 6290 } 6291 } 6292 6293 /** 6294 * ice_update_pf_stats - Update PF port stats counters 6295 * @pf: PF whose stats needs to be updated 6296 */ 6297 void ice_update_pf_stats(struct ice_pf *pf) 6298 { 6299 struct ice_hw_port_stats *prev_ps, *cur_ps; 6300 struct ice_hw *hw = &pf->hw; 6301 u16 fd_ctr_base; 6302 u8 port; 6303 6304 port = hw->port_info->lport; 6305 prev_ps = &pf->stats_prev; 6306 cur_ps = &pf->stats; 6307 6308 ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded, 6309 &prev_ps->eth.rx_bytes, 6310 &cur_ps->eth.rx_bytes); 6311 6312 ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded, 6313 &prev_ps->eth.rx_unicast, 6314 &cur_ps->eth.rx_unicast); 6315 6316 ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded, 6317 &prev_ps->eth.rx_multicast, 6318 &cur_ps->eth.rx_multicast); 6319 6320 ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded, 6321 &prev_ps->eth.rx_broadcast, 6322 &cur_ps->eth.rx_broadcast); 6323 6324 ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded, 6325 &prev_ps->eth.rx_discards, 6326 &cur_ps->eth.rx_discards); 6327 6328 ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded, 6329 &prev_ps->eth.tx_bytes, 6330 &cur_ps->eth.tx_bytes); 6331 6332 ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded, 6333 &prev_ps->eth.tx_unicast, 6334 &cur_ps->eth.tx_unicast); 6335 6336 ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded, 6337 &prev_ps->eth.tx_multicast, 6338 &cur_ps->eth.tx_multicast); 6339 6340 ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded, 6341 &prev_ps->eth.tx_broadcast, 6342 &cur_ps->eth.tx_broadcast); 6343 6344 ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded, 6345 &prev_ps->tx_dropped_link_down, 6346 &cur_ps->tx_dropped_link_down); 6347 6348 ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded, 6349 &prev_ps->rx_size_64, &cur_ps->rx_size_64); 6350 6351 ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded, 6352 &prev_ps->rx_size_127, &cur_ps->rx_size_127); 6353 6354 ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded, 6355 &prev_ps->rx_size_255, &cur_ps->rx_size_255); 6356 6357 ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded, 6358 &prev_ps->rx_size_511, &cur_ps->rx_size_511); 6359 6360 ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded, 6361 &prev_ps->rx_size_1023, &cur_ps->rx_size_1023); 6362 6363 ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded, 6364 &prev_ps->rx_size_1522, &cur_ps->rx_size_1522); 6365 6366 ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded, 6367 &prev_ps->rx_size_big, &cur_ps->rx_size_big); 6368 6369 ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded, 6370 &prev_ps->tx_size_64, &cur_ps->tx_size_64); 6371 6372 ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded, 6373 &prev_ps->tx_size_127, &cur_ps->tx_size_127); 6374 6375 ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded, 6376 &prev_ps->tx_size_255, &cur_ps->tx_size_255); 6377 6378 ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded, 6379 &prev_ps->tx_size_511, &cur_ps->tx_size_511); 6380 6381 ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded, 6382 &prev_ps->tx_size_1023, &cur_ps->tx_size_1023); 6383 6384 ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded, 6385 &prev_ps->tx_size_1522, &cur_ps->tx_size_1522); 6386 6387 ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded, 6388 &prev_ps->tx_size_big, &cur_ps->tx_size_big); 6389 6390 fd_ctr_base = hw->fd_ctr_base; 6391 6392 ice_stat_update40(hw, 6393 GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)), 6394 pf->stat_prev_loaded, &prev_ps->fd_sb_match, 6395 &cur_ps->fd_sb_match); 6396 ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded, 6397 &prev_ps->link_xon_rx, &cur_ps->link_xon_rx); 6398 6399 ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded, 6400 &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx); 6401 6402 ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded, 6403 &prev_ps->link_xon_tx, &cur_ps->link_xon_tx); 6404 6405 ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded, 6406 &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx); 6407 6408 ice_update_dcb_stats(pf); 6409 6410 ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded, 6411 &prev_ps->crc_errors, &cur_ps->crc_errors); 6412 6413 ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded, 6414 &prev_ps->illegal_bytes, &cur_ps->illegal_bytes); 6415 6416 ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded, 6417 &prev_ps->mac_local_faults, 6418 &cur_ps->mac_local_faults); 6419 6420 ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded, 6421 &prev_ps->mac_remote_faults, 6422 &cur_ps->mac_remote_faults); 6423 6424 ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, 6425 &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); 6426 6427 ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, 6428 &prev_ps->rx_undersize, &cur_ps->rx_undersize); 6429 6430 ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded, 6431 &prev_ps->rx_fragments, &cur_ps->rx_fragments); 6432 6433 ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded, 6434 &prev_ps->rx_oversize, &cur_ps->rx_oversize); 6435 6436 ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded, 6437 &prev_ps->rx_jabber, &cur_ps->rx_jabber); 6438 6439 cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0; 6440 6441 pf->stat_prev_loaded = true; 6442 } 6443 6444 /** 6445 * ice_get_stats64 - get statistics for network device structure 6446 * @netdev: network interface device structure 6447 * @stats: main device statistics structure 6448 */ 6449 static 6450 void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) 6451 { 6452 struct ice_netdev_priv *np = netdev_priv(netdev); 6453 struct rtnl_link_stats64 *vsi_stats; 6454 struct ice_vsi *vsi = np->vsi; 6455 6456 vsi_stats = &vsi->net_stats; 6457 6458 if (!vsi->num_txq || !vsi->num_rxq) 6459 return; 6460 6461 /* netdev packet/byte stats come from ring counter. These are obtained 6462 * by summing up ring counters (done by ice_update_vsi_ring_stats). 6463 * But, only call the update routine and read the registers if VSI is 6464 * not down. 6465 */ 6466 if (!test_bit(ICE_VSI_DOWN, vsi->state)) 6467 ice_update_vsi_ring_stats(vsi); 6468 stats->tx_packets = vsi_stats->tx_packets; 6469 stats->tx_bytes = vsi_stats->tx_bytes; 6470 stats->rx_packets = vsi_stats->rx_packets; 6471 stats->rx_bytes = vsi_stats->rx_bytes; 6472 6473 /* The rest of the stats can be read from the hardware but instead we 6474 * just return values that the watchdog task has already obtained from 6475 * the hardware. 6476 */ 6477 stats->multicast = vsi_stats->multicast; 6478 stats->tx_errors = vsi_stats->tx_errors; 6479 stats->tx_dropped = vsi_stats->tx_dropped; 6480 stats->rx_errors = vsi_stats->rx_errors; 6481 stats->rx_dropped = vsi_stats->rx_dropped; 6482 stats->rx_crc_errors = vsi_stats->rx_crc_errors; 6483 stats->rx_length_errors = vsi_stats->rx_length_errors; 6484 } 6485 6486 /** 6487 * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI 6488 * @vsi: VSI having NAPI disabled 6489 */ 6490 static void ice_napi_disable_all(struct ice_vsi *vsi) 6491 { 6492 int q_idx; 6493 6494 if (!vsi->netdev) 6495 return; 6496 6497 ice_for_each_q_vector(vsi, q_idx) { 6498 struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; 6499 6500 if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) 6501 napi_disable(&q_vector->napi); 6502 6503 cancel_work_sync(&q_vector->tx.dim.work); 6504 cancel_work_sync(&q_vector->rx.dim.work); 6505 } 6506 } 6507 6508 /** 6509 * ice_down - Shutdown the connection 6510 * @vsi: The VSI being stopped 6511 * 6512 * Caller of this function is expected to set the vsi->state ICE_DOWN bit 6513 */ 6514 int ice_down(struct ice_vsi *vsi) 6515 { 6516 int i, tx_err, rx_err, link_err = 0, vlan_err = 0; 6517 6518 WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); 6519 6520 if (vsi->netdev && vsi->type == ICE_VSI_PF) { 6521 vlan_err = ice_vsi_del_vlan_zero(vsi); 6522 if (!ice_is_e810(&vsi->back->hw)) 6523 ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); 6524 netif_carrier_off(vsi->netdev); 6525 netif_tx_disable(vsi->netdev); 6526 } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { 6527 ice_eswitch_stop_all_tx_queues(vsi->back); 6528 } 6529 6530 ice_vsi_dis_irq(vsi); 6531 6532 tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0); 6533 if (tx_err) 6534 netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", 6535 vsi->vsi_num, tx_err); 6536 if (!tx_err && ice_is_xdp_ena_vsi(vsi)) { 6537 tx_err = ice_vsi_stop_xdp_tx_rings(vsi); 6538 if (tx_err) 6539 netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n", 6540 vsi->vsi_num, tx_err); 6541 } 6542 6543 rx_err = ice_vsi_stop_all_rx_rings(vsi); 6544 if (rx_err) 6545 netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n", 6546 vsi->vsi_num, rx_err); 6547 6548 ice_napi_disable_all(vsi); 6549 6550 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { 6551 link_err = ice_force_phys_link_state(vsi, false); 6552 if (link_err) 6553 netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", 6554 vsi->vsi_num, link_err); 6555 } 6556 6557 ice_for_each_txq(vsi, i) 6558 ice_clean_tx_ring(vsi->tx_rings[i]); 6559 6560 ice_for_each_rxq(vsi, i) 6561 ice_clean_rx_ring(vsi->rx_rings[i]); 6562 6563 if (tx_err || rx_err || link_err || vlan_err) { 6564 netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", 6565 vsi->vsi_num, vsi->vsw->sw_id); 6566 return -EIO; 6567 } 6568 6569 return 0; 6570 } 6571 6572 /** 6573 * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources 6574 * @vsi: VSI having resources allocated 6575 * 6576 * Return 0 on success, negative on failure 6577 */ 6578 int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) 6579 { 6580 int i, err = 0; 6581 6582 if (!vsi->num_txq) { 6583 dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n", 6584 vsi->vsi_num); 6585 return -EINVAL; 6586 } 6587 6588 ice_for_each_txq(vsi, i) { 6589 struct ice_tx_ring *ring = vsi->tx_rings[i]; 6590 6591 if (!ring) 6592 return -EINVAL; 6593 6594 if (vsi->netdev) 6595 ring->netdev = vsi->netdev; 6596 err = ice_setup_tx_ring(ring); 6597 if (err) 6598 break; 6599 } 6600 6601 return err; 6602 } 6603 6604 /** 6605 * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources 6606 * @vsi: VSI having resources allocated 6607 * 6608 * Return 0 on success, negative on failure 6609 */ 6610 int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) 6611 { 6612 int i, err = 0; 6613 6614 if (!vsi->num_rxq) { 6615 dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n", 6616 vsi->vsi_num); 6617 return -EINVAL; 6618 } 6619 6620 ice_for_each_rxq(vsi, i) { 6621 struct ice_rx_ring *ring = vsi->rx_rings[i]; 6622 6623 if (!ring) 6624 return -EINVAL; 6625 6626 if (vsi->netdev) 6627 ring->netdev = vsi->netdev; 6628 err = ice_setup_rx_ring(ring); 6629 if (err) 6630 break; 6631 } 6632 6633 return err; 6634 } 6635 6636 /** 6637 * ice_vsi_open_ctrl - open control VSI for use 6638 * @vsi: the VSI to open 6639 * 6640 * Initialization of the Control VSI 6641 * 6642 * Returns 0 on success, negative value on error 6643 */ 6644 int ice_vsi_open_ctrl(struct ice_vsi *vsi) 6645 { 6646 char int_name[ICE_INT_NAME_STR_LEN]; 6647 struct ice_pf *pf = vsi->back; 6648 struct device *dev; 6649 int err; 6650 6651 dev = ice_pf_to_dev(pf); 6652 /* allocate descriptors */ 6653 err = ice_vsi_setup_tx_rings(vsi); 6654 if (err) 6655 goto err_setup_tx; 6656 6657 err = ice_vsi_setup_rx_rings(vsi); 6658 if (err) 6659 goto err_setup_rx; 6660 6661 err = ice_vsi_cfg(vsi); 6662 if (err) 6663 goto err_setup_rx; 6664 6665 snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl", 6666 dev_driver_string(dev), dev_name(dev)); 6667 err = ice_vsi_req_irq_msix(vsi, int_name); 6668 if (err) 6669 goto err_setup_rx; 6670 6671 ice_vsi_cfg_msix(vsi); 6672 6673 err = ice_vsi_start_all_rx_rings(vsi); 6674 if (err) 6675 goto err_up_complete; 6676 6677 clear_bit(ICE_VSI_DOWN, vsi->state); 6678 ice_vsi_ena_irq(vsi); 6679 6680 return 0; 6681 6682 err_up_complete: 6683 ice_down(vsi); 6684 err_setup_rx: 6685 ice_vsi_free_rx_rings(vsi); 6686 err_setup_tx: 6687 ice_vsi_free_tx_rings(vsi); 6688 6689 return err; 6690 } 6691 6692 /** 6693 * ice_vsi_open - Called when a network interface is made active 6694 * @vsi: the VSI to open 6695 * 6696 * Initialization of the VSI 6697 * 6698 * Returns 0 on success, negative value on error 6699 */ 6700 int ice_vsi_open(struct ice_vsi *vsi) 6701 { 6702 char int_name[ICE_INT_NAME_STR_LEN]; 6703 struct ice_pf *pf = vsi->back; 6704 int err; 6705 6706 /* allocate descriptors */ 6707 err = ice_vsi_setup_tx_rings(vsi); 6708 if (err) 6709 goto err_setup_tx; 6710 6711 err = ice_vsi_setup_rx_rings(vsi); 6712 if (err) 6713 goto err_setup_rx; 6714 6715 err = ice_vsi_cfg(vsi); 6716 if (err) 6717 goto err_setup_rx; 6718 6719 snprintf(int_name, sizeof(int_name) - 1, "%s-%s", 6720 dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name); 6721 err = ice_vsi_req_irq_msix(vsi, int_name); 6722 if (err) 6723 goto err_setup_rx; 6724 6725 if (vsi->type == ICE_VSI_PF) { 6726 /* Notify the stack of the actual queue counts. */ 6727 err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); 6728 if (err) 6729 goto err_set_qs; 6730 6731 err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); 6732 if (err) 6733 goto err_set_qs; 6734 } 6735 6736 err = ice_up_complete(vsi); 6737 if (err) 6738 goto err_up_complete; 6739 6740 return 0; 6741 6742 err_up_complete: 6743 ice_down(vsi); 6744 err_set_qs: 6745 ice_vsi_free_irq(vsi); 6746 err_setup_rx: 6747 ice_vsi_free_rx_rings(vsi); 6748 err_setup_tx: 6749 ice_vsi_free_tx_rings(vsi); 6750 6751 return err; 6752 } 6753 6754 /** 6755 * ice_vsi_release_all - Delete all VSIs 6756 * @pf: PF from which all VSIs are being removed 6757 */ 6758 static void ice_vsi_release_all(struct ice_pf *pf) 6759 { 6760 int err, i; 6761 6762 if (!pf->vsi) 6763 return; 6764 6765 ice_for_each_vsi(pf, i) { 6766 if (!pf->vsi[i]) 6767 continue; 6768 6769 if (pf->vsi[i]->type == ICE_VSI_CHNL) 6770 continue; 6771 6772 err = ice_vsi_release(pf->vsi[i]); 6773 if (err) 6774 dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", 6775 i, err, pf->vsi[i]->vsi_num); 6776 } 6777 } 6778 6779 /** 6780 * ice_vsi_rebuild_by_type - Rebuild VSI of a given type 6781 * @pf: pointer to the PF instance 6782 * @type: VSI type to rebuild 6783 * 6784 * Iterates through the pf->vsi array and rebuilds VSIs of the requested type 6785 */ 6786 static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) 6787 { 6788 struct device *dev = ice_pf_to_dev(pf); 6789 int i, err; 6790 6791 ice_for_each_vsi(pf, i) { 6792 struct ice_vsi *vsi = pf->vsi[i]; 6793 6794 if (!vsi || vsi->type != type) 6795 continue; 6796 6797 /* rebuild the VSI */ 6798 err = ice_vsi_rebuild(vsi, true); 6799 if (err) { 6800 dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n", 6801 err, vsi->idx, ice_vsi_type_str(type)); 6802 return err; 6803 } 6804 6805 /* replay filters for the VSI */ 6806 err = ice_replay_vsi(&pf->hw, vsi->idx); 6807 if (err) { 6808 dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n", 6809 err, vsi->idx, ice_vsi_type_str(type)); 6810 return err; 6811 } 6812 6813 /* Re-map HW VSI number, using VSI handle that has been 6814 * previously validated in ice_replay_vsi() call above 6815 */ 6816 vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); 6817 6818 /* enable the VSI */ 6819 err = ice_ena_vsi(vsi, false); 6820 if (err) { 6821 dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n", 6822 err, vsi->idx, ice_vsi_type_str(type)); 6823 return err; 6824 } 6825 6826 dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx, 6827 ice_vsi_type_str(type)); 6828 } 6829 6830 return 0; 6831 } 6832 6833 /** 6834 * ice_update_pf_netdev_link - Update PF netdev link status 6835 * @pf: pointer to the PF instance 6836 */ 6837 static void ice_update_pf_netdev_link(struct ice_pf *pf) 6838 { 6839 bool link_up; 6840 int i; 6841 6842 ice_for_each_vsi(pf, i) { 6843 struct ice_vsi *vsi = pf->vsi[i]; 6844 6845 if (!vsi || vsi->type != ICE_VSI_PF) 6846 return; 6847 6848 ice_get_link_status(pf->vsi[i]->port_info, &link_up); 6849 if (link_up) { 6850 netif_carrier_on(pf->vsi[i]->netdev); 6851 netif_tx_wake_all_queues(pf->vsi[i]->netdev); 6852 } else { 6853 netif_carrier_off(pf->vsi[i]->netdev); 6854 netif_tx_stop_all_queues(pf->vsi[i]->netdev); 6855 } 6856 } 6857 } 6858 6859 /** 6860 * ice_rebuild - rebuild after reset 6861 * @pf: PF to rebuild 6862 * @reset_type: type of reset 6863 * 6864 * Do not rebuild VF VSI in this flow because that is already handled via 6865 * ice_reset_all_vfs(). This is because requirements for resetting a VF after a 6866 * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want 6867 * to reset/rebuild all the VF VSI twice. 6868 */ 6869 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) 6870 { 6871 struct device *dev = ice_pf_to_dev(pf); 6872 struct ice_hw *hw = &pf->hw; 6873 bool dvm; 6874 int err; 6875 6876 if (test_bit(ICE_DOWN, pf->state)) 6877 goto clear_recovery; 6878 6879 dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); 6880 6881 if (reset_type == ICE_RESET_EMPR) { 6882 /* If an EMP reset has occurred, any previously pending flash 6883 * update will have completed. We no longer know whether or 6884 * not the NVM update EMP reset is restricted. 6885 */ 6886 pf->fw_emp_reset_disabled = false; 6887 } 6888 6889 err = ice_init_all_ctrlq(hw); 6890 if (err) { 6891 dev_err(dev, "control queues init failed %d\n", err); 6892 goto err_init_ctrlq; 6893 } 6894 6895 /* if DDP was previously loaded successfully */ 6896 if (!ice_is_safe_mode(pf)) { 6897 /* reload the SW DB of filter tables */ 6898 if (reset_type == ICE_RESET_PFR) 6899 ice_fill_blk_tbls(hw); 6900 else 6901 /* Reload DDP Package after CORER/GLOBR reset */ 6902 ice_load_pkg(NULL, pf); 6903 } 6904 6905 err = ice_clear_pf_cfg(hw); 6906 if (err) { 6907 dev_err(dev, "clear PF configuration failed %d\n", err); 6908 goto err_init_ctrlq; 6909 } 6910 6911 if (pf->first_sw->dflt_vsi_ena) 6912 dev_info(dev, "Clearing default VSI, re-enable after reset completes\n"); 6913 /* clear the default VSI configuration if it exists */ 6914 pf->first_sw->dflt_vsi = NULL; 6915 pf->first_sw->dflt_vsi_ena = false; 6916 6917 ice_clear_pxe_mode(hw); 6918 6919 err = ice_init_nvm(hw); 6920 if (err) { 6921 dev_err(dev, "ice_init_nvm failed %d\n", err); 6922 goto err_init_ctrlq; 6923 } 6924 6925 err = ice_get_caps(hw); 6926 if (err) { 6927 dev_err(dev, "ice_get_caps failed %d\n", err); 6928 goto err_init_ctrlq; 6929 } 6930 6931 err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL); 6932 if (err) { 6933 dev_err(dev, "set_mac_cfg failed %d\n", err); 6934 goto err_init_ctrlq; 6935 } 6936 6937 dvm = ice_is_dvm_ena(hw); 6938 6939 err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL); 6940 if (err) 6941 goto err_init_ctrlq; 6942 6943 err = ice_sched_init_port(hw->port_info); 6944 if (err) 6945 goto err_sched_init_port; 6946 6947 /* start misc vector */ 6948 err = ice_req_irq_msix_misc(pf); 6949 if (err) { 6950 dev_err(dev, "misc vector setup failed: %d\n", err); 6951 goto err_sched_init_port; 6952 } 6953 6954 if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { 6955 wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M); 6956 if (!rd32(hw, PFQF_FD_SIZE)) { 6957 u16 unused, guar, b_effort; 6958 6959 guar = hw->func_caps.fd_fltr_guar; 6960 b_effort = hw->func_caps.fd_fltr_best_effort; 6961 6962 /* force guaranteed filter pool for PF */ 6963 ice_alloc_fd_guar_item(hw, &unused, guar); 6964 /* force shared filter pool for PF */ 6965 ice_alloc_fd_shrd_item(hw, &unused, b_effort); 6966 } 6967 } 6968 6969 if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) 6970 ice_dcb_rebuild(pf); 6971 6972 /* If the PF previously had enabled PTP, PTP init needs to happen before 6973 * the VSI rebuild. If not, this causes the PTP link status events to 6974 * fail. 6975 */ 6976 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) 6977 ice_ptp_reset(pf); 6978 6979 if (ice_is_feature_supported(pf, ICE_F_GNSS)) 6980 ice_gnss_init(pf); 6981 6982 /* rebuild PF VSI */ 6983 err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); 6984 if (err) { 6985 dev_err(dev, "PF VSI rebuild failed: %d\n", err); 6986 goto err_vsi_rebuild; 6987 } 6988 6989 /* configure PTP timestamping after VSI rebuild */ 6990 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) 6991 ice_ptp_cfg_timestamp(pf, false); 6992 6993 err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); 6994 if (err) { 6995 dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); 6996 goto err_vsi_rebuild; 6997 } 6998 6999 if (reset_type == ICE_RESET_PFR) { 7000 err = ice_rebuild_channels(pf); 7001 if (err) { 7002 dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n", 7003 err); 7004 goto err_vsi_rebuild; 7005 } 7006 } 7007 7008 /* If Flow Director is active */ 7009 if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { 7010 err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL); 7011 if (err) { 7012 dev_err(dev, "control VSI rebuild failed: %d\n", err); 7013 goto err_vsi_rebuild; 7014 } 7015 7016 /* replay HW Flow Director recipes */ 7017 if (hw->fdir_prof) 7018 ice_fdir_replay_flows(hw); 7019 7020 /* replay Flow Director filters */ 7021 ice_fdir_replay_fltrs(pf); 7022 7023 ice_rebuild_arfs(pf); 7024 } 7025 7026 ice_update_pf_netdev_link(pf); 7027 7028 /* tell the firmware we are up */ 7029 err = ice_send_version(pf); 7030 if (err) { 7031 dev_err(dev, "Rebuild failed due to error sending driver version: %d\n", 7032 err); 7033 goto err_vsi_rebuild; 7034 } 7035 7036 ice_replay_post(hw); 7037 7038 /* if we get here, reset flow is successful */ 7039 clear_bit(ICE_RESET_FAILED, pf->state); 7040 7041 ice_plug_aux_dev(pf); 7042 return; 7043 7044 err_vsi_rebuild: 7045 err_sched_init_port: 7046 ice_sched_cleanup_all(hw); 7047 err_init_ctrlq: 7048 ice_shutdown_all_ctrlq(hw); 7049 set_bit(ICE_RESET_FAILED, pf->state); 7050 clear_recovery: 7051 /* set this bit in PF state to control service task scheduling */ 7052 set_bit(ICE_NEEDS_RESTART, pf->state); 7053 dev_err(dev, "Rebuild failed, unload and reload driver\n"); 7054 } 7055 7056 /** 7057 * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP 7058 * @vsi: Pointer to VSI structure 7059 */ 7060 static int ice_max_xdp_frame_size(struct ice_vsi *vsi) 7061 { 7062 if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) 7063 return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM; 7064 else 7065 return ICE_RXBUF_3072; 7066 } 7067 7068 /** 7069 * ice_change_mtu - NDO callback to change the MTU 7070 * @netdev: network interface device structure 7071 * @new_mtu: new value for maximum frame size 7072 * 7073 * Returns 0 on success, negative on failure 7074 */ 7075 static int ice_change_mtu(struct net_device *netdev, int new_mtu) 7076 { 7077 struct ice_netdev_priv *np = netdev_priv(netdev); 7078 struct ice_vsi *vsi = np->vsi; 7079 struct ice_pf *pf = vsi->back; 7080 u8 count = 0; 7081 int err = 0; 7082 7083 if (new_mtu == (int)netdev->mtu) { 7084 netdev_warn(netdev, "MTU is already %u\n", netdev->mtu); 7085 return 0; 7086 } 7087 7088 if (ice_is_xdp_ena_vsi(vsi)) { 7089 int frame_size = ice_max_xdp_frame_size(vsi); 7090 7091 if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) { 7092 netdev_err(netdev, "max MTU for XDP usage is %d\n", 7093 frame_size - ICE_ETH_PKT_HDR_PAD); 7094 return -EINVAL; 7095 } 7096 } 7097 7098 /* if a reset is in progress, wait for some time for it to complete */ 7099 do { 7100 if (ice_is_reset_in_progress(pf->state)) { 7101 count++; 7102 usleep_range(1000, 2000); 7103 } else { 7104 break; 7105 } 7106 7107 } while (count < 100); 7108 7109 if (count == 100) { 7110 netdev_err(netdev, "can't change MTU. Device is busy\n"); 7111 return -EBUSY; 7112 } 7113 7114 netdev->mtu = (unsigned int)new_mtu; 7115 7116 /* if VSI is up, bring it down and then back up */ 7117 if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { 7118 err = ice_down(vsi); 7119 if (err) { 7120 netdev_err(netdev, "change MTU if_down err %d\n", err); 7121 return err; 7122 } 7123 7124 err = ice_up(vsi); 7125 if (err) { 7126 netdev_err(netdev, "change MTU if_up err %d\n", err); 7127 return err; 7128 } 7129 } 7130 7131 netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); 7132 set_bit(ICE_FLAG_MTU_CHANGED, pf->flags); 7133 7134 return err; 7135 } 7136 7137 /** 7138 * ice_eth_ioctl - Access the hwtstamp interface 7139 * @netdev: network interface device structure 7140 * @ifr: interface request data 7141 * @cmd: ioctl command 7142 */ 7143 static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 7144 { 7145 struct ice_netdev_priv *np = netdev_priv(netdev); 7146 struct ice_pf *pf = np->vsi->back; 7147 7148 switch (cmd) { 7149 case SIOCGHWTSTAMP: 7150 return ice_ptp_get_ts_config(pf, ifr); 7151 case SIOCSHWTSTAMP: 7152 return ice_ptp_set_ts_config(pf, ifr); 7153 default: 7154 return -EOPNOTSUPP; 7155 } 7156 } 7157 7158 /** 7159 * ice_aq_str - convert AQ err code to a string 7160 * @aq_err: the AQ error code to convert 7161 */ 7162 const char *ice_aq_str(enum ice_aq_err aq_err) 7163 { 7164 switch (aq_err) { 7165 case ICE_AQ_RC_OK: 7166 return "OK"; 7167 case ICE_AQ_RC_EPERM: 7168 return "ICE_AQ_RC_EPERM"; 7169 case ICE_AQ_RC_ENOENT: 7170 return "ICE_AQ_RC_ENOENT"; 7171 case ICE_AQ_RC_ENOMEM: 7172 return "ICE_AQ_RC_ENOMEM"; 7173 case ICE_AQ_RC_EBUSY: 7174 return "ICE_AQ_RC_EBUSY"; 7175 case ICE_AQ_RC_EEXIST: 7176 return "ICE_AQ_RC_EEXIST"; 7177 case ICE_AQ_RC_EINVAL: 7178 return "ICE_AQ_RC_EINVAL"; 7179 case ICE_AQ_RC_ENOSPC: 7180 return "ICE_AQ_RC_ENOSPC"; 7181 case ICE_AQ_RC_ENOSYS: 7182 return "ICE_AQ_RC_ENOSYS"; 7183 case ICE_AQ_RC_EMODE: 7184 return "ICE_AQ_RC_EMODE"; 7185 case ICE_AQ_RC_ENOSEC: 7186 return "ICE_AQ_RC_ENOSEC"; 7187 case ICE_AQ_RC_EBADSIG: 7188 return "ICE_AQ_RC_EBADSIG"; 7189 case ICE_AQ_RC_ESVN: 7190 return "ICE_AQ_RC_ESVN"; 7191 case ICE_AQ_RC_EBADMAN: 7192 return "ICE_AQ_RC_EBADMAN"; 7193 case ICE_AQ_RC_EBADBUF: 7194 return "ICE_AQ_RC_EBADBUF"; 7195 } 7196 7197 return "ICE_AQ_RC_UNKNOWN"; 7198 } 7199 7200 /** 7201 * ice_set_rss_lut - Set RSS LUT 7202 * @vsi: Pointer to VSI structure 7203 * @lut: Lookup table 7204 * @lut_size: Lookup table size 7205 * 7206 * Returns 0 on success, negative on failure 7207 */ 7208 int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) 7209 { 7210 struct ice_aq_get_set_rss_lut_params params = {}; 7211 struct ice_hw *hw = &vsi->back->hw; 7212 int status; 7213 7214 if (!lut) 7215 return -EINVAL; 7216 7217 params.vsi_handle = vsi->idx; 7218 params.lut_size = lut_size; 7219 params.lut_type = vsi->rss_lut_type; 7220 params.lut = lut; 7221 7222 status = ice_aq_set_rss_lut(hw, ¶ms); 7223 if (status) 7224 dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n", 7225 status, ice_aq_str(hw->adminq.sq_last_status)); 7226 7227 return status; 7228 } 7229 7230 /** 7231 * ice_set_rss_key - Set RSS key 7232 * @vsi: Pointer to the VSI structure 7233 * @seed: RSS hash seed 7234 * 7235 * Returns 0 on success, negative on failure 7236 */ 7237 int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed) 7238 { 7239 struct ice_hw *hw = &vsi->back->hw; 7240 int status; 7241 7242 if (!seed) 7243 return -EINVAL; 7244 7245 status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); 7246 if (status) 7247 dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n", 7248 status, ice_aq_str(hw->adminq.sq_last_status)); 7249 7250 return status; 7251 } 7252 7253 /** 7254 * ice_get_rss_lut - Get RSS LUT 7255 * @vsi: Pointer to VSI structure 7256 * @lut: Buffer to store the lookup table entries 7257 * @lut_size: Size of buffer to store the lookup table entries 7258 * 7259 * Returns 0 on success, negative on failure 7260 */ 7261 int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) 7262 { 7263 struct ice_aq_get_set_rss_lut_params params = {}; 7264 struct ice_hw *hw = &vsi->back->hw; 7265 int status; 7266 7267 if (!lut) 7268 return -EINVAL; 7269 7270 params.vsi_handle = vsi->idx; 7271 params.lut_size = lut_size; 7272 params.lut_type = vsi->rss_lut_type; 7273 params.lut = lut; 7274 7275 status = ice_aq_get_rss_lut(hw, ¶ms); 7276 if (status) 7277 dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n", 7278 status, ice_aq_str(hw->adminq.sq_last_status)); 7279 7280 return status; 7281 } 7282 7283 /** 7284 * ice_get_rss_key - Get RSS key 7285 * @vsi: Pointer to VSI structure 7286 * @seed: Buffer to store the key in 7287 * 7288 * Returns 0 on success, negative on failure 7289 */ 7290 int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) 7291 { 7292 struct ice_hw *hw = &vsi->back->hw; 7293 int status; 7294 7295 if (!seed) 7296 return -EINVAL; 7297 7298 status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); 7299 if (status) 7300 dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n", 7301 status, ice_aq_str(hw->adminq.sq_last_status)); 7302 7303 return status; 7304 } 7305 7306 /** 7307 * ice_bridge_getlink - Get the hardware bridge mode 7308 * @skb: skb buff 7309 * @pid: process ID 7310 * @seq: RTNL message seq 7311 * @dev: the netdev being configured 7312 * @filter_mask: filter mask passed in 7313 * @nlflags: netlink flags passed in 7314 * 7315 * Return the bridge mode (VEB/VEPA) 7316 */ 7317 static int 7318 ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 7319 struct net_device *dev, u32 filter_mask, int nlflags) 7320 { 7321 struct ice_netdev_priv *np = netdev_priv(dev); 7322 struct ice_vsi *vsi = np->vsi; 7323 struct ice_pf *pf = vsi->back; 7324 u16 bmode; 7325 7326 bmode = pf->first_sw->bridge_mode; 7327 7328 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags, 7329 filter_mask, NULL); 7330 } 7331 7332 /** 7333 * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA) 7334 * @vsi: Pointer to VSI structure 7335 * @bmode: Hardware bridge mode (VEB/VEPA) 7336 * 7337 * Returns 0 on success, negative on failure 7338 */ 7339 static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) 7340 { 7341 struct ice_aqc_vsi_props *vsi_props; 7342 struct ice_hw *hw = &vsi->back->hw; 7343 struct ice_vsi_ctx *ctxt; 7344 int ret; 7345 7346 vsi_props = &vsi->info; 7347 7348 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 7349 if (!ctxt) 7350 return -ENOMEM; 7351 7352 ctxt->info = vsi->info; 7353 7354 if (bmode == BRIDGE_MODE_VEB) 7355 /* change from VEPA to VEB mode */ 7356 ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; 7357 else 7358 /* change from VEB to VEPA mode */ 7359 ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB; 7360 ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); 7361 7362 ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); 7363 if (ret) { 7364 dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n", 7365 bmode, ret, ice_aq_str(hw->adminq.sq_last_status)); 7366 goto out; 7367 } 7368 /* Update sw flags for book keeping */ 7369 vsi_props->sw_flags = ctxt->info.sw_flags; 7370 7371 out: 7372 kfree(ctxt); 7373 return ret; 7374 } 7375 7376 /** 7377 * ice_bridge_setlink - Set the hardware bridge mode 7378 * @dev: the netdev being configured 7379 * @nlh: RTNL message 7380 * @flags: bridge setlink flags 7381 * @extack: netlink extended ack 7382 * 7383 * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is 7384 * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if 7385 * not already set for all VSIs connected to this switch. And also update the 7386 * unicast switch filter rules for the corresponding switch of the netdev. 7387 */ 7388 static int 7389 ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, 7390 u16 __always_unused flags, 7391 struct netlink_ext_ack __always_unused *extack) 7392 { 7393 struct ice_netdev_priv *np = netdev_priv(dev); 7394 struct ice_pf *pf = np->vsi->back; 7395 struct nlattr *attr, *br_spec; 7396 struct ice_hw *hw = &pf->hw; 7397 struct ice_sw *pf_sw; 7398 int rem, v, err = 0; 7399 7400 pf_sw = pf->first_sw; 7401 /* find the attribute in the netlink message */ 7402 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 7403 7404 nla_for_each_nested(attr, br_spec, rem) { 7405 __u16 mode; 7406 7407 if (nla_type(attr) != IFLA_BRIDGE_MODE) 7408 continue; 7409 mode = nla_get_u16(attr); 7410 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB) 7411 return -EINVAL; 7412 /* Continue if bridge mode is not being flipped */ 7413 if (mode == pf_sw->bridge_mode) 7414 continue; 7415 /* Iterates through the PF VSI list and update the loopback 7416 * mode of the VSI 7417 */ 7418 ice_for_each_vsi(pf, v) { 7419 if (!pf->vsi[v]) 7420 continue; 7421 err = ice_vsi_update_bridge_mode(pf->vsi[v], mode); 7422 if (err) 7423 return err; 7424 } 7425 7426 hw->evb_veb = (mode == BRIDGE_MODE_VEB); 7427 /* Update the unicast switch filter rules for the corresponding 7428 * switch of the netdev 7429 */ 7430 err = ice_update_sw_rule_bridge_mode(hw); 7431 if (err) { 7432 netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %s\n", 7433 mode, err, 7434 ice_aq_str(hw->adminq.sq_last_status)); 7435 /* revert hw->evb_veb */ 7436 hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB); 7437 return err; 7438 } 7439 7440 pf_sw->bridge_mode = mode; 7441 } 7442 7443 return 0; 7444 } 7445 7446 /** 7447 * ice_tx_timeout - Respond to a Tx Hang 7448 * @netdev: network interface device structure 7449 * @txqueue: Tx queue 7450 */ 7451 static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) 7452 { 7453 struct ice_netdev_priv *np = netdev_priv(netdev); 7454 struct ice_tx_ring *tx_ring = NULL; 7455 struct ice_vsi *vsi = np->vsi; 7456 struct ice_pf *pf = vsi->back; 7457 u32 i; 7458 7459 pf->tx_timeout_count++; 7460 7461 /* Check if PFC is enabled for the TC to which the queue belongs 7462 * to. If yes then Tx timeout is not caused by a hung queue, no 7463 * need to reset and rebuild 7464 */ 7465 if (ice_is_pfc_causing_hung_q(pf, txqueue)) { 7466 dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n", 7467 txqueue); 7468 return; 7469 } 7470 7471 /* now that we have an index, find the tx_ring struct */ 7472 ice_for_each_txq(vsi, i) 7473 if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) 7474 if (txqueue == vsi->tx_rings[i]->q_index) { 7475 tx_ring = vsi->tx_rings[i]; 7476 break; 7477 } 7478 7479 /* Reset recovery level if enough time has elapsed after last timeout. 7480 * Also ensure no new reset action happens before next timeout period. 7481 */ 7482 if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20))) 7483 pf->tx_timeout_recovery_level = 1; 7484 else if (time_before(jiffies, (pf->tx_timeout_last_recovery + 7485 netdev->watchdog_timeo))) 7486 return; 7487 7488 if (tx_ring) { 7489 struct ice_hw *hw = &pf->hw; 7490 u32 head, val = 0; 7491 7492 head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) & 7493 QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S; 7494 /* Read interrupt register */ 7495 val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); 7496 7497 netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", 7498 vsi->vsi_num, txqueue, tx_ring->next_to_clean, 7499 head, tx_ring->next_to_use, val); 7500 } 7501 7502 pf->tx_timeout_last_recovery = jiffies; 7503 netdev_info(netdev, "tx_timeout recovery level %d, txqueue %u\n", 7504 pf->tx_timeout_recovery_level, txqueue); 7505 7506 switch (pf->tx_timeout_recovery_level) { 7507 case 1: 7508 set_bit(ICE_PFR_REQ, pf->state); 7509 break; 7510 case 2: 7511 set_bit(ICE_CORER_REQ, pf->state); 7512 break; 7513 case 3: 7514 set_bit(ICE_GLOBR_REQ, pf->state); 7515 break; 7516 default: 7517 netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); 7518 set_bit(ICE_DOWN, pf->state); 7519 set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); 7520 set_bit(ICE_SERVICE_DIS, pf->state); 7521 break; 7522 } 7523 7524 ice_service_task_schedule(pf); 7525 pf->tx_timeout_recovery_level++; 7526 } 7527 7528 /** 7529 * ice_setup_tc_cls_flower - flower classifier offloads 7530 * @np: net device to configure 7531 * @filter_dev: device on which filter is added 7532 * @cls_flower: offload data 7533 */ 7534 static int 7535 ice_setup_tc_cls_flower(struct ice_netdev_priv *np, 7536 struct net_device *filter_dev, 7537 struct flow_cls_offload *cls_flower) 7538 { 7539 struct ice_vsi *vsi = np->vsi; 7540 7541 if (cls_flower->common.chain_index) 7542 return -EOPNOTSUPP; 7543 7544 switch (cls_flower->command) { 7545 case FLOW_CLS_REPLACE: 7546 return ice_add_cls_flower(filter_dev, vsi, cls_flower); 7547 case FLOW_CLS_DESTROY: 7548 return ice_del_cls_flower(vsi, cls_flower); 7549 default: 7550 return -EINVAL; 7551 } 7552 } 7553 7554 /** 7555 * ice_setup_tc_block_cb - callback handler registered for TC block 7556 * @type: TC SETUP type 7557 * @type_data: TC flower offload data that contains user input 7558 * @cb_priv: netdev private data 7559 */ 7560 static int 7561 ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) 7562 { 7563 struct ice_netdev_priv *np = cb_priv; 7564 7565 switch (type) { 7566 case TC_SETUP_CLSFLOWER: 7567 return ice_setup_tc_cls_flower(np, np->vsi->netdev, 7568 type_data); 7569 default: 7570 return -EOPNOTSUPP; 7571 } 7572 } 7573 7574 /** 7575 * ice_validate_mqprio_qopt - Validate TCF input parameters 7576 * @vsi: Pointer to VSI 7577 * @mqprio_qopt: input parameters for mqprio queue configuration 7578 * 7579 * This function validates MQPRIO params, such as qcount (power of 2 wherever 7580 * needed), and make sure user doesn't specify qcount and BW rate limit 7581 * for TCs, which are more than "num_tc" 7582 */ 7583 static int 7584 ice_validate_mqprio_qopt(struct ice_vsi *vsi, 7585 struct tc_mqprio_qopt_offload *mqprio_qopt) 7586 { 7587 u64 sum_max_rate = 0, sum_min_rate = 0; 7588 int non_power_of_2_qcount = 0; 7589 struct ice_pf *pf = vsi->back; 7590 int max_rss_q_cnt = 0; 7591 struct device *dev; 7592 int i, speed; 7593 u8 num_tc; 7594 7595 if (vsi->type != ICE_VSI_PF) 7596 return -EINVAL; 7597 7598 if (mqprio_qopt->qopt.offset[0] != 0 || 7599 mqprio_qopt->qopt.num_tc < 1 || 7600 mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC) 7601 return -EINVAL; 7602 7603 dev = ice_pf_to_dev(pf); 7604 vsi->ch_rss_size = 0; 7605 num_tc = mqprio_qopt->qopt.num_tc; 7606 7607 for (i = 0; num_tc; i++) { 7608 int qcount = mqprio_qopt->qopt.count[i]; 7609 u64 max_rate, min_rate, rem; 7610 7611 if (!qcount) 7612 return -EINVAL; 7613 7614 if (is_power_of_2(qcount)) { 7615 if (non_power_of_2_qcount && 7616 qcount > non_power_of_2_qcount) { 7617 dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n", 7618 qcount, non_power_of_2_qcount); 7619 return -EINVAL; 7620 } 7621 if (qcount > max_rss_q_cnt) 7622 max_rss_q_cnt = qcount; 7623 } else { 7624 if (non_power_of_2_qcount && 7625 qcount != non_power_of_2_qcount) { 7626 dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n", 7627 qcount, non_power_of_2_qcount); 7628 return -EINVAL; 7629 } 7630 if (qcount < max_rss_q_cnt) { 7631 dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n", 7632 qcount, max_rss_q_cnt); 7633 return -EINVAL; 7634 } 7635 max_rss_q_cnt = qcount; 7636 non_power_of_2_qcount = qcount; 7637 } 7638 7639 /* TC command takes input in K/N/Gbps or K/M/Gbit etc but 7640 * converts the bandwidth rate limit into Bytes/s when 7641 * passing it down to the driver. So convert input bandwidth 7642 * from Bytes/s to Kbps 7643 */ 7644 max_rate = mqprio_qopt->max_rate[i]; 7645 max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); 7646 sum_max_rate += max_rate; 7647 7648 /* min_rate is minimum guaranteed rate and it can't be zero */ 7649 min_rate = mqprio_qopt->min_rate[i]; 7650 min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR); 7651 sum_min_rate += min_rate; 7652 7653 if (min_rate && min_rate < ICE_MIN_BW_LIMIT) { 7654 dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i, 7655 min_rate, ICE_MIN_BW_LIMIT); 7656 return -EINVAL; 7657 } 7658 7659 iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); 7660 if (rem) { 7661 dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", 7662 i, ICE_MIN_BW_LIMIT); 7663 return -EINVAL; 7664 } 7665 7666 iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem); 7667 if (rem) { 7668 dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps", 7669 i, ICE_MIN_BW_LIMIT); 7670 return -EINVAL; 7671 } 7672 7673 /* min_rate can't be more than max_rate, except when max_rate 7674 * is zero (implies max_rate sought is max line rate). In such 7675 * a case min_rate can be more than max. 7676 */ 7677 if (max_rate && min_rate > max_rate) { 7678 dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n", 7679 min_rate, max_rate); 7680 return -EINVAL; 7681 } 7682 7683 if (i >= mqprio_qopt->qopt.num_tc - 1) 7684 break; 7685 if (mqprio_qopt->qopt.offset[i + 1] != 7686 (mqprio_qopt->qopt.offset[i] + qcount)) 7687 return -EINVAL; 7688 } 7689 if (vsi->num_rxq < 7690 (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) 7691 return -EINVAL; 7692 if (vsi->num_txq < 7693 (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) 7694 return -EINVAL; 7695 7696 speed = ice_get_link_speed_kbps(vsi); 7697 if (sum_max_rate && sum_max_rate > (u64)speed) { 7698 dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", 7699 sum_max_rate, speed); 7700 return -EINVAL; 7701 } 7702 if (sum_min_rate && sum_min_rate > (u64)speed) { 7703 dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", 7704 sum_min_rate, speed); 7705 return -EINVAL; 7706 } 7707 7708 /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */ 7709 vsi->ch_rss_size = max_rss_q_cnt; 7710 7711 return 0; 7712 } 7713 7714 /** 7715 * ice_add_vsi_to_fdir - add a VSI to the flow director group for PF 7716 * @pf: ptr to PF device 7717 * @vsi: ptr to VSI 7718 */ 7719 static int ice_add_vsi_to_fdir(struct ice_pf *pf, struct ice_vsi *vsi) 7720 { 7721 struct device *dev = ice_pf_to_dev(pf); 7722 bool added = false; 7723 struct ice_hw *hw; 7724 int flow; 7725 7726 if (!(vsi->num_gfltr || vsi->num_bfltr)) 7727 return -EINVAL; 7728 7729 hw = &pf->hw; 7730 for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) { 7731 struct ice_fd_hw_prof *prof; 7732 int tun, status; 7733 u64 entry_h; 7734 7735 if (!(hw->fdir_prof && hw->fdir_prof[flow] && 7736 hw->fdir_prof[flow]->cnt)) 7737 continue; 7738 7739 for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) { 7740 enum ice_flow_priority prio; 7741 u64 prof_id; 7742 7743 /* add this VSI to FDir profile for this flow */ 7744 prio = ICE_FLOW_PRIO_NORMAL; 7745 prof = hw->fdir_prof[flow]; 7746 prof_id = flow + tun * ICE_FLTR_PTYPE_MAX; 7747 status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, 7748 prof->vsi_h[0], vsi->idx, 7749 prio, prof->fdir_seg[tun], 7750 &entry_h); 7751 if (status) { 7752 dev_err(dev, "channel VSI idx %d, not able to add to group %d\n", 7753 vsi->idx, flow); 7754 continue; 7755 } 7756 7757 prof->entry_h[prof->cnt][tun] = entry_h; 7758 } 7759 7760 /* store VSI for filter replay and delete */ 7761 prof->vsi_h[prof->cnt] = vsi->idx; 7762 prof->cnt++; 7763 7764 added = true; 7765 dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx, 7766 flow); 7767 } 7768 7769 if (!added) 7770 dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx); 7771 7772 return 0; 7773 } 7774 7775 /** 7776 * ice_add_channel - add a channel by adding VSI 7777 * @pf: ptr to PF device 7778 * @sw_id: underlying HW switching element ID 7779 * @ch: ptr to channel structure 7780 * 7781 * Add a channel (VSI) using add_vsi and queue_map 7782 */ 7783 static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch) 7784 { 7785 struct device *dev = ice_pf_to_dev(pf); 7786 struct ice_vsi *vsi; 7787 7788 if (ch->type != ICE_VSI_CHNL) { 7789 dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type); 7790 return -EINVAL; 7791 } 7792 7793 vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch); 7794 if (!vsi || vsi->type != ICE_VSI_CHNL) { 7795 dev_err(dev, "create chnl VSI failure\n"); 7796 return -EINVAL; 7797 } 7798 7799 ice_add_vsi_to_fdir(pf, vsi); 7800 7801 ch->sw_id = sw_id; 7802 ch->vsi_num = vsi->vsi_num; 7803 ch->info.mapping_flags = vsi->info.mapping_flags; 7804 ch->ch_vsi = vsi; 7805 /* set the back pointer of channel for newly created VSI */ 7806 vsi->ch = ch; 7807 7808 memcpy(&ch->info.q_mapping, &vsi->info.q_mapping, 7809 sizeof(vsi->info.q_mapping)); 7810 memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping, 7811 sizeof(vsi->info.tc_mapping)); 7812 7813 return 0; 7814 } 7815 7816 /** 7817 * ice_chnl_cfg_res 7818 * @vsi: the VSI being setup 7819 * @ch: ptr to channel structure 7820 * 7821 * Configure channel specific resources such as rings, vector. 7822 */ 7823 static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch) 7824 { 7825 int i; 7826 7827 for (i = 0; i < ch->num_txq; i++) { 7828 struct ice_q_vector *tx_q_vector, *rx_q_vector; 7829 struct ice_ring_container *rc; 7830 struct ice_tx_ring *tx_ring; 7831 struct ice_rx_ring *rx_ring; 7832 7833 tx_ring = vsi->tx_rings[ch->base_q + i]; 7834 rx_ring = vsi->rx_rings[ch->base_q + i]; 7835 if (!tx_ring || !rx_ring) 7836 continue; 7837 7838 /* setup ring being channel enabled */ 7839 tx_ring->ch = ch; 7840 rx_ring->ch = ch; 7841 7842 /* following code block sets up vector specific attributes */ 7843 tx_q_vector = tx_ring->q_vector; 7844 rx_q_vector = rx_ring->q_vector; 7845 if (!tx_q_vector && !rx_q_vector) 7846 continue; 7847 7848 if (tx_q_vector) { 7849 tx_q_vector->ch = ch; 7850 /* setup Tx and Rx ITR setting if DIM is off */ 7851 rc = &tx_q_vector->tx; 7852 if (!ITR_IS_DYNAMIC(rc)) 7853 ice_write_itr(rc, rc->itr_setting); 7854 } 7855 if (rx_q_vector) { 7856 rx_q_vector->ch = ch; 7857 /* setup Tx and Rx ITR setting if DIM is off */ 7858 rc = &rx_q_vector->rx; 7859 if (!ITR_IS_DYNAMIC(rc)) 7860 ice_write_itr(rc, rc->itr_setting); 7861 } 7862 } 7863 7864 /* it is safe to assume that, if channel has non-zero num_t[r]xq, then 7865 * GLINT_ITR register would have written to perform in-context 7866 * update, hence perform flush 7867 */ 7868 if (ch->num_txq || ch->num_rxq) 7869 ice_flush(&vsi->back->hw); 7870 } 7871 7872 /** 7873 * ice_cfg_chnl_all_res - configure channel resources 7874 * @vsi: pte to main_vsi 7875 * @ch: ptr to channel structure 7876 * 7877 * This function configures channel specific resources such as flow-director 7878 * counter index, and other resources such as queues, vectors, ITR settings 7879 */ 7880 static void 7881 ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch) 7882 { 7883 /* configure channel (aka ADQ) resources such as queues, vectors, 7884 * ITR settings for channel specific vectors and anything else 7885 */ 7886 ice_chnl_cfg_res(vsi, ch); 7887 } 7888 7889 /** 7890 * ice_setup_hw_channel - setup new channel 7891 * @pf: ptr to PF device 7892 * @vsi: the VSI being setup 7893 * @ch: ptr to channel structure 7894 * @sw_id: underlying HW switching element ID 7895 * @type: type of channel to be created (VMDq2/VF) 7896 * 7897 * Setup new channel (VSI) based on specified type (VMDq2/VF) 7898 * and configures Tx rings accordingly 7899 */ 7900 static int 7901 ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi, 7902 struct ice_channel *ch, u16 sw_id, u8 type) 7903 { 7904 struct device *dev = ice_pf_to_dev(pf); 7905 int ret; 7906 7907 ch->base_q = vsi->next_base_q; 7908 ch->type = type; 7909 7910 ret = ice_add_channel(pf, sw_id, ch); 7911 if (ret) { 7912 dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id); 7913 return ret; 7914 } 7915 7916 /* configure/setup ADQ specific resources */ 7917 ice_cfg_chnl_all_res(vsi, ch); 7918 7919 /* make sure to update the next_base_q so that subsequent channel's 7920 * (aka ADQ) VSI queue map is correct 7921 */ 7922 vsi->next_base_q = vsi->next_base_q + ch->num_rxq; 7923 dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num, 7924 ch->num_rxq); 7925 7926 return 0; 7927 } 7928 7929 /** 7930 * ice_setup_channel - setup new channel using uplink element 7931 * @pf: ptr to PF device 7932 * @vsi: the VSI being setup 7933 * @ch: ptr to channel structure 7934 * 7935 * Setup new channel (VSI) based on specified type (VMDq2/VF) 7936 * and uplink switching element 7937 */ 7938 static bool 7939 ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi, 7940 struct ice_channel *ch) 7941 { 7942 struct device *dev = ice_pf_to_dev(pf); 7943 u16 sw_id; 7944 int ret; 7945 7946 if (vsi->type != ICE_VSI_PF) { 7947 dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type); 7948 return false; 7949 } 7950 7951 sw_id = pf->first_sw->sw_id; 7952 7953 /* create channel (VSI) */ 7954 ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL); 7955 if (ret) { 7956 dev_err(dev, "failed to setup hw_channel\n"); 7957 return false; 7958 } 7959 dev_dbg(dev, "successfully created channel()\n"); 7960 7961 return ch->ch_vsi ? true : false; 7962 } 7963 7964 /** 7965 * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate 7966 * @vsi: VSI to be configured 7967 * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit 7968 * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit 7969 */ 7970 static int 7971 ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate) 7972 { 7973 int err; 7974 7975 err = ice_set_min_bw_limit(vsi, min_tx_rate); 7976 if (err) 7977 return err; 7978 7979 return ice_set_max_bw_limit(vsi, max_tx_rate); 7980 } 7981 7982 /** 7983 * ice_create_q_channel - function to create channel 7984 * @vsi: VSI to be configured 7985 * @ch: ptr to channel (it contains channel specific params) 7986 * 7987 * This function creates channel (VSI) using num_queues specified by user, 7988 * reconfigs RSS if needed. 7989 */ 7990 static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) 7991 { 7992 struct ice_pf *pf = vsi->back; 7993 struct device *dev; 7994 7995 if (!ch) 7996 return -EINVAL; 7997 7998 dev = ice_pf_to_dev(pf); 7999 if (!ch->num_txq || !ch->num_rxq) { 8000 dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq); 8001 return -EINVAL; 8002 } 8003 8004 if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) { 8005 dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n", 8006 vsi->cnt_q_avail, ch->num_txq); 8007 return -EINVAL; 8008 } 8009 8010 if (!ice_setup_channel(pf, vsi, ch)) { 8011 dev_info(dev, "Failed to setup channel\n"); 8012 return -EINVAL; 8013 } 8014 /* configure BW rate limit */ 8015 if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) { 8016 int ret; 8017 8018 ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate, 8019 ch->min_tx_rate); 8020 if (ret) 8021 dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n", 8022 ch->max_tx_rate, ch->ch_vsi->vsi_num); 8023 else 8024 dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n", 8025 ch->max_tx_rate, ch->ch_vsi->vsi_num); 8026 } 8027 8028 vsi->cnt_q_avail -= ch->num_txq; 8029 8030 return 0; 8031 } 8032 8033 /** 8034 * ice_rem_all_chnl_fltrs - removes all channel filters 8035 * @pf: ptr to PF, TC-flower based filter are tracked at PF level 8036 * 8037 * Remove all advanced switch filters only if they are channel specific 8038 * tc-flower based filter 8039 */ 8040 static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) 8041 { 8042 struct ice_tc_flower_fltr *fltr; 8043 struct hlist_node *node; 8044 8045 /* to remove all channel filters, iterate an ordered list of filters */ 8046 hlist_for_each_entry_safe(fltr, node, 8047 &pf->tc_flower_fltr_list, 8048 tc_flower_node) { 8049 struct ice_rule_query_data rule; 8050 int status; 8051 8052 /* for now process only channel specific filters */ 8053 if (!ice_is_chnl_fltr(fltr)) 8054 continue; 8055 8056 rule.rid = fltr->rid; 8057 rule.rule_id = fltr->rule_id; 8058 rule.vsi_handle = fltr->dest_id; 8059 status = ice_rem_adv_rule_by_id(&pf->hw, &rule); 8060 if (status) { 8061 if (status == -ENOENT) 8062 dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n", 8063 rule.rule_id); 8064 else 8065 dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n", 8066 status); 8067 } else if (fltr->dest_vsi) { 8068 /* update advanced switch filter count */ 8069 if (fltr->dest_vsi->type == ICE_VSI_CHNL) { 8070 u32 flags = fltr->flags; 8071 8072 fltr->dest_vsi->num_chnl_fltr--; 8073 if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | 8074 ICE_TC_FLWR_FIELD_ENC_DST_MAC)) 8075 pf->num_dmac_chnl_fltrs--; 8076 } 8077 } 8078 8079 hlist_del(&fltr->tc_flower_node); 8080 kfree(fltr); 8081 } 8082 } 8083 8084 /** 8085 * ice_remove_q_channels - Remove queue channels for the TCs 8086 * @vsi: VSI to be configured 8087 * @rem_fltr: delete advanced switch filter or not 8088 * 8089 * Remove queue channels for the TCs 8090 */ 8091 static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr) 8092 { 8093 struct ice_channel *ch, *ch_tmp; 8094 struct ice_pf *pf = vsi->back; 8095 int i; 8096 8097 /* remove all tc-flower based filter if they are channel filters only */ 8098 if (rem_fltr) 8099 ice_rem_all_chnl_fltrs(pf); 8100 8101 /* remove ntuple filters since queue configuration is being changed */ 8102 if (vsi->netdev->features & NETIF_F_NTUPLE) { 8103 struct ice_hw *hw = &pf->hw; 8104 8105 mutex_lock(&hw->fdir_fltr_lock); 8106 ice_fdir_del_all_fltrs(vsi); 8107 mutex_unlock(&hw->fdir_fltr_lock); 8108 } 8109 8110 /* perform cleanup for channels if they exist */ 8111 list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { 8112 struct ice_vsi *ch_vsi; 8113 8114 list_del(&ch->list); 8115 ch_vsi = ch->ch_vsi; 8116 if (!ch_vsi) { 8117 kfree(ch); 8118 continue; 8119 } 8120 8121 /* Reset queue contexts */ 8122 for (i = 0; i < ch->num_rxq; i++) { 8123 struct ice_tx_ring *tx_ring; 8124 struct ice_rx_ring *rx_ring; 8125 8126 tx_ring = vsi->tx_rings[ch->base_q + i]; 8127 rx_ring = vsi->rx_rings[ch->base_q + i]; 8128 if (tx_ring) { 8129 tx_ring->ch = NULL; 8130 if (tx_ring->q_vector) 8131 tx_ring->q_vector->ch = NULL; 8132 } 8133 if (rx_ring) { 8134 rx_ring->ch = NULL; 8135 if (rx_ring->q_vector) 8136 rx_ring->q_vector->ch = NULL; 8137 } 8138 } 8139 8140 /* Release FD resources for the channel VSI */ 8141 ice_fdir_rem_adq_chnl(&pf->hw, ch->ch_vsi->idx); 8142 8143 /* clear the VSI from scheduler tree */ 8144 ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx); 8145 8146 /* Delete VSI from FW */ 8147 ice_vsi_delete(ch->ch_vsi); 8148 8149 /* Delete VSI from PF and HW VSI arrays */ 8150 ice_vsi_clear(ch->ch_vsi); 8151 8152 /* free the channel */ 8153 kfree(ch); 8154 } 8155 8156 /* clear the channel VSI map which is stored in main VSI */ 8157 ice_for_each_chnl_tc(i) 8158 vsi->tc_map_vsi[i] = NULL; 8159 8160 /* reset main VSI's all TC information */ 8161 vsi->all_enatc = 0; 8162 vsi->all_numtc = 0; 8163 } 8164 8165 /** 8166 * ice_rebuild_channels - rebuild channel 8167 * @pf: ptr to PF 8168 * 8169 * Recreate channel VSIs and replay filters 8170 */ 8171 static int ice_rebuild_channels(struct ice_pf *pf) 8172 { 8173 struct device *dev = ice_pf_to_dev(pf); 8174 struct ice_vsi *main_vsi; 8175 bool rem_adv_fltr = true; 8176 struct ice_channel *ch; 8177 struct ice_vsi *vsi; 8178 int tc_idx = 1; 8179 int i, err; 8180 8181 main_vsi = ice_get_main_vsi(pf); 8182 if (!main_vsi) 8183 return 0; 8184 8185 if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) || 8186 main_vsi->old_numtc == 1) 8187 return 0; /* nothing to be done */ 8188 8189 /* reconfigure main VSI based on old value of TC and cached values 8190 * for MQPRIO opts 8191 */ 8192 err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc); 8193 if (err) { 8194 dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n", 8195 main_vsi->old_ena_tc, main_vsi->vsi_num); 8196 return err; 8197 } 8198 8199 /* rebuild ADQ VSIs */ 8200 ice_for_each_vsi(pf, i) { 8201 enum ice_vsi_type type; 8202 8203 vsi = pf->vsi[i]; 8204 if (!vsi || vsi->type != ICE_VSI_CHNL) 8205 continue; 8206 8207 type = vsi->type; 8208 8209 /* rebuild ADQ VSI */ 8210 err = ice_vsi_rebuild(vsi, true); 8211 if (err) { 8212 dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n", 8213 ice_vsi_type_str(type), vsi->idx, err); 8214 goto cleanup; 8215 } 8216 8217 /* Re-map HW VSI number, using VSI handle that has been 8218 * previously validated in ice_replay_vsi() call above 8219 */ 8220 vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); 8221 8222 /* replay filters for the VSI */ 8223 err = ice_replay_vsi(&pf->hw, vsi->idx); 8224 if (err) { 8225 dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n", 8226 ice_vsi_type_str(type), err, vsi->idx); 8227 rem_adv_fltr = false; 8228 goto cleanup; 8229 } 8230 dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n", 8231 ice_vsi_type_str(type), vsi->idx); 8232 8233 /* store ADQ VSI at correct TC index in main VSI's 8234 * map of TC to VSI 8235 */ 8236 main_vsi->tc_map_vsi[tc_idx++] = vsi; 8237 } 8238 8239 /* ADQ VSI(s) has been rebuilt successfully, so setup 8240 * channel for main VSI's Tx and Rx rings 8241 */ 8242 list_for_each_entry(ch, &main_vsi->ch_list, list) { 8243 struct ice_vsi *ch_vsi; 8244 8245 ch_vsi = ch->ch_vsi; 8246 if (!ch_vsi) 8247 continue; 8248 8249 /* reconfig channel resources */ 8250 ice_cfg_chnl_all_res(main_vsi, ch); 8251 8252 /* replay BW rate limit if it is non-zero */ 8253 if (!ch->max_tx_rate && !ch->min_tx_rate) 8254 continue; 8255 8256 err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate, 8257 ch->min_tx_rate); 8258 if (err) 8259 dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", 8260 err, ch->max_tx_rate, ch->min_tx_rate, 8261 ch_vsi->vsi_num); 8262 else 8263 dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", 8264 ch->max_tx_rate, ch->min_tx_rate, 8265 ch_vsi->vsi_num); 8266 } 8267 8268 /* reconfig RSS for main VSI */ 8269 if (main_vsi->ch_rss_size) 8270 ice_vsi_cfg_rss_lut_key(main_vsi); 8271 8272 return 0; 8273 8274 cleanup: 8275 ice_remove_q_channels(main_vsi, rem_adv_fltr); 8276 return err; 8277 } 8278 8279 /** 8280 * ice_create_q_channels - Add queue channel for the given TCs 8281 * @vsi: VSI to be configured 8282 * 8283 * Configures queue channel mapping to the given TCs 8284 */ 8285 static int ice_create_q_channels(struct ice_vsi *vsi) 8286 { 8287 struct ice_pf *pf = vsi->back; 8288 struct ice_channel *ch; 8289 int ret = 0, i; 8290 8291 ice_for_each_chnl_tc(i) { 8292 if (!(vsi->all_enatc & BIT(i))) 8293 continue; 8294 8295 ch = kzalloc(sizeof(*ch), GFP_KERNEL); 8296 if (!ch) { 8297 ret = -ENOMEM; 8298 goto err_free; 8299 } 8300 INIT_LIST_HEAD(&ch->list); 8301 ch->num_rxq = vsi->mqprio_qopt.qopt.count[i]; 8302 ch->num_txq = vsi->mqprio_qopt.qopt.count[i]; 8303 ch->base_q = vsi->mqprio_qopt.qopt.offset[i]; 8304 ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i]; 8305 ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i]; 8306 8307 /* convert to Kbits/s */ 8308 if (ch->max_tx_rate) 8309 ch->max_tx_rate = div_u64(ch->max_tx_rate, 8310 ICE_BW_KBPS_DIVISOR); 8311 if (ch->min_tx_rate) 8312 ch->min_tx_rate = div_u64(ch->min_tx_rate, 8313 ICE_BW_KBPS_DIVISOR); 8314 8315 ret = ice_create_q_channel(vsi, ch); 8316 if (ret) { 8317 dev_err(ice_pf_to_dev(pf), 8318 "failed creating channel TC:%d\n", i); 8319 kfree(ch); 8320 goto err_free; 8321 } 8322 list_add_tail(&ch->list, &vsi->ch_list); 8323 vsi->tc_map_vsi[i] = ch->ch_vsi; 8324 dev_dbg(ice_pf_to_dev(pf), 8325 "successfully created channel: VSI %pK\n", ch->ch_vsi); 8326 } 8327 return 0; 8328 8329 err_free: 8330 ice_remove_q_channels(vsi, false); 8331 8332 return ret; 8333 } 8334 8335 /** 8336 * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes 8337 * @netdev: net device to configure 8338 * @type_data: TC offload data 8339 */ 8340 static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) 8341 { 8342 struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; 8343 struct ice_netdev_priv *np = netdev_priv(netdev); 8344 struct ice_vsi *vsi = np->vsi; 8345 struct ice_pf *pf = vsi->back; 8346 u16 mode, ena_tc_qdisc = 0; 8347 int cur_txq, cur_rxq; 8348 u8 hw = 0, num_tcf; 8349 struct device *dev; 8350 int ret, i; 8351 8352 dev = ice_pf_to_dev(pf); 8353 num_tcf = mqprio_qopt->qopt.num_tc; 8354 hw = mqprio_qopt->qopt.hw; 8355 mode = mqprio_qopt->mode; 8356 if (!hw) { 8357 clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); 8358 vsi->ch_rss_size = 0; 8359 memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); 8360 goto config_tcf; 8361 } 8362 8363 /* Generate queue region map for number of TCF requested */ 8364 for (i = 0; i < num_tcf; i++) 8365 ena_tc_qdisc |= BIT(i); 8366 8367 switch (mode) { 8368 case TC_MQPRIO_MODE_CHANNEL: 8369 8370 ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); 8371 if (ret) { 8372 netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", 8373 ret); 8374 return ret; 8375 } 8376 memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); 8377 set_bit(ICE_FLAG_TC_MQPRIO, pf->flags); 8378 /* don't assume state of hw_tc_offload during driver load 8379 * and set the flag for TC flower filter if hw_tc_offload 8380 * already ON 8381 */ 8382 if (vsi->netdev->features & NETIF_F_HW_TC) 8383 set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); 8384 break; 8385 default: 8386 return -EINVAL; 8387 } 8388 8389 config_tcf: 8390 8391 /* Requesting same TCF configuration as already enabled */ 8392 if (ena_tc_qdisc == vsi->tc_cfg.ena_tc && 8393 mode != TC_MQPRIO_MODE_CHANNEL) 8394 return 0; 8395 8396 /* Pause VSI queues */ 8397 ice_dis_vsi(vsi, true); 8398 8399 if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) 8400 ice_remove_q_channels(vsi, true); 8401 8402 if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { 8403 vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf), 8404 num_online_cpus()); 8405 vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf), 8406 num_online_cpus()); 8407 } else { 8408 /* logic to rebuild VSI, same like ethtool -L */ 8409 u16 offset = 0, qcount_tx = 0, qcount_rx = 0; 8410 8411 for (i = 0; i < num_tcf; i++) { 8412 if (!(ena_tc_qdisc & BIT(i))) 8413 continue; 8414 8415 offset = vsi->mqprio_qopt.qopt.offset[i]; 8416 qcount_rx = vsi->mqprio_qopt.qopt.count[i]; 8417 qcount_tx = vsi->mqprio_qopt.qopt.count[i]; 8418 } 8419 vsi->req_txq = offset + qcount_tx; 8420 vsi->req_rxq = offset + qcount_rx; 8421 8422 /* store away original rss_size info, so that it gets reused 8423 * form ice_vsi_rebuild during tc-qdisc delete stage - to 8424 * determine, what should be the rss_sizefor main VSI 8425 */ 8426 vsi->orig_rss_size = vsi->rss_size; 8427 } 8428 8429 /* save current values of Tx and Rx queues before calling VSI rebuild 8430 * for fallback option 8431 */ 8432 cur_txq = vsi->num_txq; 8433 cur_rxq = vsi->num_rxq; 8434 8435 /* proceed with rebuild main VSI using correct number of queues */ 8436 ret = ice_vsi_rebuild(vsi, false); 8437 if (ret) { 8438 /* fallback to current number of queues */ 8439 dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n"); 8440 vsi->req_txq = cur_txq; 8441 vsi->req_rxq = cur_rxq; 8442 clear_bit(ICE_RESET_FAILED, pf->state); 8443 if (ice_vsi_rebuild(vsi, false)) { 8444 dev_err(dev, "Rebuild of main VSI failed again\n"); 8445 return ret; 8446 } 8447 } 8448 8449 vsi->all_numtc = num_tcf; 8450 vsi->all_enatc = ena_tc_qdisc; 8451 ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc); 8452 if (ret) { 8453 netdev_err(netdev, "failed configuring TC for VSI id=%d\n", 8454 vsi->vsi_num); 8455 goto exit; 8456 } 8457 8458 if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { 8459 u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; 8460 u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0]; 8461 8462 /* set TC0 rate limit if specified */ 8463 if (max_tx_rate || min_tx_rate) { 8464 /* convert to Kbits/s */ 8465 if (max_tx_rate) 8466 max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR); 8467 if (min_tx_rate) 8468 min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR); 8469 8470 ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate); 8471 if (!ret) { 8472 dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n", 8473 max_tx_rate, min_tx_rate, vsi->vsi_num); 8474 } else { 8475 dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n", 8476 max_tx_rate, min_tx_rate, vsi->vsi_num); 8477 goto exit; 8478 } 8479 } 8480 ret = ice_create_q_channels(vsi); 8481 if (ret) { 8482 netdev_err(netdev, "failed configuring queue channels\n"); 8483 goto exit; 8484 } else { 8485 netdev_dbg(netdev, "successfully configured channels\n"); 8486 } 8487 } 8488 8489 if (vsi->ch_rss_size) 8490 ice_vsi_cfg_rss_lut_key(vsi); 8491 8492 exit: 8493 /* if error, reset the all_numtc and all_enatc */ 8494 if (ret) { 8495 vsi->all_numtc = 0; 8496 vsi->all_enatc = 0; 8497 } 8498 /* resume VSI */ 8499 ice_ena_vsi(vsi, true); 8500 8501 return ret; 8502 } 8503 8504 static LIST_HEAD(ice_block_cb_list); 8505 8506 static int 8507 ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, 8508 void *type_data) 8509 { 8510 struct ice_netdev_priv *np = netdev_priv(netdev); 8511 struct ice_pf *pf = np->vsi->back; 8512 int err; 8513 8514 switch (type) { 8515 case TC_SETUP_BLOCK: 8516 return flow_block_cb_setup_simple(type_data, 8517 &ice_block_cb_list, 8518 ice_setup_tc_block_cb, 8519 np, np, true); 8520 case TC_SETUP_QDISC_MQPRIO: 8521 /* setup traffic classifier for receive side */ 8522 mutex_lock(&pf->tc_mutex); 8523 err = ice_setup_tc_mqprio_qdisc(netdev, type_data); 8524 mutex_unlock(&pf->tc_mutex); 8525 return err; 8526 default: 8527 return -EOPNOTSUPP; 8528 } 8529 return -EOPNOTSUPP; 8530 } 8531 8532 static struct ice_indr_block_priv * 8533 ice_indr_block_priv_lookup(struct ice_netdev_priv *np, 8534 struct net_device *netdev) 8535 { 8536 struct ice_indr_block_priv *cb_priv; 8537 8538 list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) { 8539 if (!cb_priv->netdev) 8540 return NULL; 8541 if (cb_priv->netdev == netdev) 8542 return cb_priv; 8543 } 8544 return NULL; 8545 } 8546 8547 static int 8548 ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data, 8549 void *indr_priv) 8550 { 8551 struct ice_indr_block_priv *priv = indr_priv; 8552 struct ice_netdev_priv *np = priv->np; 8553 8554 switch (type) { 8555 case TC_SETUP_CLSFLOWER: 8556 return ice_setup_tc_cls_flower(np, priv->netdev, 8557 (struct flow_cls_offload *) 8558 type_data); 8559 default: 8560 return -EOPNOTSUPP; 8561 } 8562 } 8563 8564 static int 8565 ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch, 8566 struct ice_netdev_priv *np, 8567 struct flow_block_offload *f, void *data, 8568 void (*cleanup)(struct flow_block_cb *block_cb)) 8569 { 8570 struct ice_indr_block_priv *indr_priv; 8571 struct flow_block_cb *block_cb; 8572 8573 if (!ice_is_tunnel_supported(netdev) && 8574 !(is_vlan_dev(netdev) && 8575 vlan_dev_real_dev(netdev) == np->vsi->netdev)) 8576 return -EOPNOTSUPP; 8577 8578 if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) 8579 return -EOPNOTSUPP; 8580 8581 switch (f->command) { 8582 case FLOW_BLOCK_BIND: 8583 indr_priv = ice_indr_block_priv_lookup(np, netdev); 8584 if (indr_priv) 8585 return -EEXIST; 8586 8587 indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL); 8588 if (!indr_priv) 8589 return -ENOMEM; 8590 8591 indr_priv->netdev = netdev; 8592 indr_priv->np = np; 8593 list_add(&indr_priv->list, &np->tc_indr_block_priv_list); 8594 8595 block_cb = 8596 flow_indr_block_cb_alloc(ice_indr_setup_block_cb, 8597 indr_priv, indr_priv, 8598 ice_rep_indr_tc_block_unbind, 8599 f, netdev, sch, data, np, 8600 cleanup); 8601 8602 if (IS_ERR(block_cb)) { 8603 list_del(&indr_priv->list); 8604 kfree(indr_priv); 8605 return PTR_ERR(block_cb); 8606 } 8607 flow_block_cb_add(block_cb, f); 8608 list_add_tail(&block_cb->driver_list, &ice_block_cb_list); 8609 break; 8610 case FLOW_BLOCK_UNBIND: 8611 indr_priv = ice_indr_block_priv_lookup(np, netdev); 8612 if (!indr_priv) 8613 return -ENOENT; 8614 8615 block_cb = flow_block_cb_lookup(f->block, 8616 ice_indr_setup_block_cb, 8617 indr_priv); 8618 if (!block_cb) 8619 return -ENOENT; 8620 8621 flow_indr_block_cb_remove(block_cb, f); 8622 8623 list_del(&block_cb->driver_list); 8624 break; 8625 default: 8626 return -EOPNOTSUPP; 8627 } 8628 return 0; 8629 } 8630 8631 static int 8632 ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, 8633 void *cb_priv, enum tc_setup_type type, void *type_data, 8634 void *data, 8635 void (*cleanup)(struct flow_block_cb *block_cb)) 8636 { 8637 switch (type) { 8638 case TC_SETUP_BLOCK: 8639 return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data, 8640 data, cleanup); 8641 8642 default: 8643 return -EOPNOTSUPP; 8644 } 8645 } 8646 8647 /** 8648 * ice_open - Called when a network interface becomes active 8649 * @netdev: network interface device structure 8650 * 8651 * The open entry point is called when a network interface is made 8652 * active by the system (IFF_UP). At this point all resources needed 8653 * for transmit and receive operations are allocated, the interrupt 8654 * handler is registered with the OS, the netdev watchdog is enabled, 8655 * and the stack is notified that the interface is ready. 8656 * 8657 * Returns 0 on success, negative value on failure 8658 */ 8659 int ice_open(struct net_device *netdev) 8660 { 8661 struct ice_netdev_priv *np = netdev_priv(netdev); 8662 struct ice_pf *pf = np->vsi->back; 8663 8664 if (ice_is_reset_in_progress(pf->state)) { 8665 netdev_err(netdev, "can't open net device while reset is in progress"); 8666 return -EBUSY; 8667 } 8668 8669 return ice_open_internal(netdev); 8670 } 8671 8672 /** 8673 * ice_open_internal - Called when a network interface becomes active 8674 * @netdev: network interface device structure 8675 * 8676 * Internal ice_open implementation. Should not be used directly except for ice_open and reset 8677 * handling routine 8678 * 8679 * Returns 0 on success, negative value on failure 8680 */ 8681 int ice_open_internal(struct net_device *netdev) 8682 { 8683 struct ice_netdev_priv *np = netdev_priv(netdev); 8684 struct ice_vsi *vsi = np->vsi; 8685 struct ice_pf *pf = vsi->back; 8686 struct ice_port_info *pi; 8687 int err; 8688 8689 if (test_bit(ICE_NEEDS_RESTART, pf->state)) { 8690 netdev_err(netdev, "driver needs to be unloaded and reloaded\n"); 8691 return -EIO; 8692 } 8693 8694 netif_carrier_off(netdev); 8695 8696 pi = vsi->port_info; 8697 err = ice_update_link_info(pi); 8698 if (err) { 8699 netdev_err(netdev, "Failed to get link info, error %d\n", err); 8700 return err; 8701 } 8702 8703 ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); 8704 8705 /* Set PHY if there is media, otherwise, turn off PHY */ 8706 if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { 8707 clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); 8708 if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) { 8709 err = ice_init_phy_user_cfg(pi); 8710 if (err) { 8711 netdev_err(netdev, "Failed to initialize PHY settings, error %d\n", 8712 err); 8713 return err; 8714 } 8715 } 8716 8717 err = ice_configure_phy(vsi); 8718 if (err) { 8719 netdev_err(netdev, "Failed to set physical link up, error %d\n", 8720 err); 8721 return err; 8722 } 8723 } else { 8724 set_bit(ICE_FLAG_NO_MEDIA, pf->flags); 8725 ice_set_link(vsi, false); 8726 } 8727 8728 err = ice_vsi_open(vsi); 8729 if (err) 8730 netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", 8731 vsi->vsi_num, vsi->vsw->sw_id); 8732 8733 /* Update existing tunnels information */ 8734 udp_tunnel_get_rx_info(netdev); 8735 8736 return err; 8737 } 8738 8739 /** 8740 * ice_stop - Disables a network interface 8741 * @netdev: network interface device structure 8742 * 8743 * The stop entry point is called when an interface is de-activated by the OS, 8744 * and the netdevice enters the DOWN state. The hardware is still under the 8745 * driver's control, but the netdev interface is disabled. 8746 * 8747 * Returns success only - not allowed to fail 8748 */ 8749 int ice_stop(struct net_device *netdev) 8750 { 8751 struct ice_netdev_priv *np = netdev_priv(netdev); 8752 struct ice_vsi *vsi = np->vsi; 8753 struct ice_pf *pf = vsi->back; 8754 8755 if (ice_is_reset_in_progress(pf->state)) { 8756 netdev_err(netdev, "can't stop net device while reset is in progress"); 8757 return -EBUSY; 8758 } 8759 8760 ice_vsi_close(vsi); 8761 8762 return 0; 8763 } 8764 8765 /** 8766 * ice_features_check - Validate encapsulated packet conforms to limits 8767 * @skb: skb buffer 8768 * @netdev: This port's netdev 8769 * @features: Offload features that the stack believes apply 8770 */ 8771 static netdev_features_t 8772 ice_features_check(struct sk_buff *skb, 8773 struct net_device __always_unused *netdev, 8774 netdev_features_t features) 8775 { 8776 bool gso = skb_is_gso(skb); 8777 size_t len; 8778 8779 /* No point in doing any of this if neither checksum nor GSO are 8780 * being requested for this frame. We can rule out both by just 8781 * checking for CHECKSUM_PARTIAL 8782 */ 8783 if (skb->ip_summed != CHECKSUM_PARTIAL) 8784 return features; 8785 8786 /* We cannot support GSO if the MSS is going to be less than 8787 * 64 bytes. If it is then we need to drop support for GSO. 8788 */ 8789 if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS)) 8790 features &= ~NETIF_F_GSO_MASK; 8791 8792 len = skb_network_offset(skb); 8793 if (len > ICE_TXD_MACLEN_MAX || len & 0x1) 8794 goto out_rm_features; 8795 8796 len = skb_network_header_len(skb); 8797 if (len > ICE_TXD_IPLEN_MAX || len & 0x1) 8798 goto out_rm_features; 8799 8800 if (skb->encapsulation) { 8801 /* this must work for VXLAN frames AND IPIP/SIT frames, and in 8802 * the case of IPIP frames, the transport header pointer is 8803 * after the inner header! So check to make sure that this 8804 * is a GRE or UDP_TUNNEL frame before doing that math. 8805 */ 8806 if (gso && (skb_shinfo(skb)->gso_type & 8807 (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) { 8808 len = skb_inner_network_header(skb) - 8809 skb_transport_header(skb); 8810 if (len > ICE_TXD_L4LEN_MAX || len & 0x1) 8811 goto out_rm_features; 8812 } 8813 8814 len = skb_inner_network_header_len(skb); 8815 if (len > ICE_TXD_IPLEN_MAX || len & 0x1) 8816 goto out_rm_features; 8817 } 8818 8819 return features; 8820 out_rm_features: 8821 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 8822 } 8823 8824 static const struct net_device_ops ice_netdev_safe_mode_ops = { 8825 .ndo_open = ice_open, 8826 .ndo_stop = ice_stop, 8827 .ndo_start_xmit = ice_start_xmit, 8828 .ndo_set_mac_address = ice_set_mac_address, 8829 .ndo_validate_addr = eth_validate_addr, 8830 .ndo_change_mtu = ice_change_mtu, 8831 .ndo_get_stats64 = ice_get_stats64, 8832 .ndo_tx_timeout = ice_tx_timeout, 8833 .ndo_bpf = ice_xdp_safe_mode, 8834 }; 8835 8836 static const struct net_device_ops ice_netdev_ops = { 8837 .ndo_open = ice_open, 8838 .ndo_stop = ice_stop, 8839 .ndo_start_xmit = ice_start_xmit, 8840 .ndo_select_queue = ice_select_queue, 8841 .ndo_features_check = ice_features_check, 8842 .ndo_fix_features = ice_fix_features, 8843 .ndo_set_rx_mode = ice_set_rx_mode, 8844 .ndo_set_mac_address = ice_set_mac_address, 8845 .ndo_validate_addr = eth_validate_addr, 8846 .ndo_change_mtu = ice_change_mtu, 8847 .ndo_get_stats64 = ice_get_stats64, 8848 .ndo_set_tx_maxrate = ice_set_tx_maxrate, 8849 .ndo_eth_ioctl = ice_eth_ioctl, 8850 .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, 8851 .ndo_set_vf_mac = ice_set_vf_mac, 8852 .ndo_get_vf_config = ice_get_vf_cfg, 8853 .ndo_set_vf_trust = ice_set_vf_trust, 8854 .ndo_set_vf_vlan = ice_set_vf_port_vlan, 8855 .ndo_set_vf_link_state = ice_set_vf_link_state, 8856 .ndo_get_vf_stats = ice_get_vf_stats, 8857 .ndo_set_vf_rate = ice_set_vf_bw, 8858 .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, 8859 .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, 8860 .ndo_setup_tc = ice_setup_tc, 8861 .ndo_set_features = ice_set_features, 8862 .ndo_bridge_getlink = ice_bridge_getlink, 8863 .ndo_bridge_setlink = ice_bridge_setlink, 8864 .ndo_fdb_add = ice_fdb_add, 8865 .ndo_fdb_del = ice_fdb_del, 8866 #ifdef CONFIG_RFS_ACCEL 8867 .ndo_rx_flow_steer = ice_rx_flow_steer, 8868 #endif 8869 .ndo_tx_timeout = ice_tx_timeout, 8870 .ndo_bpf = ice_xdp, 8871 .ndo_xdp_xmit = ice_xdp_xmit, 8872 .ndo_xsk_wakeup = ice_xsk_wakeup, 8873 }; 8874