1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 35 char gve_driver_name[] = "gve"; 36 const char gve_version_str[] = GVE_VERSION; 37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 38 39 static int gve_verify_driver_compatibility(struct gve_priv *priv) 40 { 41 int err; 42 struct gve_driver_info *driver_info; 43 dma_addr_t driver_info_bus; 44 45 driver_info = dma_alloc_coherent(&priv->pdev->dev, 46 sizeof(struct gve_driver_info), 47 &driver_info_bus, GFP_KERNEL); 48 if (!driver_info) 49 return -ENOMEM; 50 51 *driver_info = (struct gve_driver_info) { 52 .os_type = 1, /* Linux */ 53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 56 .driver_capability_flags = { 57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 61 }, 62 }; 63 strscpy(driver_info->os_version_str1, utsname()->release, 64 sizeof(driver_info->os_version_str1)); 65 strscpy(driver_info->os_version_str2, utsname()->version, 66 sizeof(driver_info->os_version_str2)); 67 68 err = gve_adminq_verify_driver_compatibility(priv, 69 sizeof(struct gve_driver_info), 70 driver_info_bus); 71 72 /* It's ok if the device doesn't support this */ 73 if (err == -EOPNOTSUPP) 74 err = 0; 75 76 dma_free_coherent(&priv->pdev->dev, 77 sizeof(struct gve_driver_info), 78 driver_info, driver_info_bus); 79 return err; 80 } 81 82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 83 { 84 struct gve_priv *priv = netdev_priv(dev); 85 86 if (gve_is_gqi(priv)) 87 return gve_tx(skb, dev); 88 else 89 return gve_tx_dqo(skb, dev); 90 } 91 92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 93 { 94 struct gve_priv *priv = netdev_priv(dev); 95 unsigned int start; 96 u64 packets, bytes; 97 int num_tx_queues; 98 int ring; 99 100 num_tx_queues = gve_num_tx_queues(priv); 101 if (priv->rx) { 102 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 103 do { 104 start = 105 u64_stats_fetch_begin(&priv->rx[ring].statss); 106 packets = priv->rx[ring].rpackets; 107 bytes = priv->rx[ring].rbytes; 108 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 109 start)); 110 s->rx_packets += packets; 111 s->rx_bytes += bytes; 112 } 113 } 114 if (priv->tx) { 115 for (ring = 0; ring < num_tx_queues; ring++) { 116 do { 117 start = 118 u64_stats_fetch_begin(&priv->tx[ring].statss); 119 packets = priv->tx[ring].pkt_done; 120 bytes = priv->tx[ring].bytes_done; 121 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 122 start)); 123 s->tx_packets += packets; 124 s->tx_bytes += bytes; 125 } 126 } 127 } 128 129 static int gve_alloc_counter_array(struct gve_priv *priv) 130 { 131 priv->counter_array = 132 dma_alloc_coherent(&priv->pdev->dev, 133 priv->num_event_counters * 134 sizeof(*priv->counter_array), 135 &priv->counter_array_bus, GFP_KERNEL); 136 if (!priv->counter_array) 137 return -ENOMEM; 138 139 return 0; 140 } 141 142 static void gve_free_counter_array(struct gve_priv *priv) 143 { 144 if (!priv->counter_array) 145 return; 146 147 dma_free_coherent(&priv->pdev->dev, 148 priv->num_event_counters * 149 sizeof(*priv->counter_array), 150 priv->counter_array, priv->counter_array_bus); 151 priv->counter_array = NULL; 152 } 153 154 /* NIC requests to report stats */ 155 static void gve_stats_report_task(struct work_struct *work) 156 { 157 struct gve_priv *priv = container_of(work, struct gve_priv, 158 stats_report_task); 159 if (gve_get_do_report_stats(priv)) { 160 gve_handle_report_stats(priv); 161 gve_clear_do_report_stats(priv); 162 } 163 } 164 165 static void gve_stats_report_schedule(struct gve_priv *priv) 166 { 167 if (!gve_get_probe_in_progress(priv) && 168 !gve_get_reset_in_progress(priv)) { 169 gve_set_do_report_stats(priv); 170 queue_work(priv->gve_wq, &priv->stats_report_task); 171 } 172 } 173 174 static void gve_stats_report_timer(struct timer_list *t) 175 { 176 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 177 178 mod_timer(&priv->stats_report_timer, 179 round_jiffies(jiffies + 180 msecs_to_jiffies(priv->stats_report_timer_period))); 181 gve_stats_report_schedule(priv); 182 } 183 184 static int gve_alloc_stats_report(struct gve_priv *priv) 185 { 186 int tx_stats_num, rx_stats_num; 187 188 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 189 gve_num_tx_queues(priv); 190 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 191 priv->rx_cfg.num_queues; 192 priv->stats_report_len = struct_size(priv->stats_report, stats, 193 size_add(tx_stats_num, rx_stats_num)); 194 priv->stats_report = 195 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 196 &priv->stats_report_bus, GFP_KERNEL); 197 if (!priv->stats_report) 198 return -ENOMEM; 199 /* Set up timer for the report-stats task */ 200 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 201 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 202 return 0; 203 } 204 205 static void gve_free_stats_report(struct gve_priv *priv) 206 { 207 if (!priv->stats_report) 208 return; 209 210 del_timer_sync(&priv->stats_report_timer); 211 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 212 priv->stats_report, priv->stats_report_bus); 213 priv->stats_report = NULL; 214 } 215 216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 217 { 218 struct gve_priv *priv = arg; 219 220 queue_work(priv->gve_wq, &priv->service_task); 221 return IRQ_HANDLED; 222 } 223 224 static irqreturn_t gve_intr(int irq, void *arg) 225 { 226 struct gve_notify_block *block = arg; 227 struct gve_priv *priv = block->priv; 228 229 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 230 napi_schedule_irqoff(&block->napi); 231 return IRQ_HANDLED; 232 } 233 234 static irqreturn_t gve_intr_dqo(int irq, void *arg) 235 { 236 struct gve_notify_block *block = arg; 237 238 /* Interrupts are automatically masked */ 239 napi_schedule_irqoff(&block->napi); 240 return IRQ_HANDLED; 241 } 242 243 static int gve_napi_poll(struct napi_struct *napi, int budget) 244 { 245 struct gve_notify_block *block; 246 __be32 __iomem *irq_doorbell; 247 bool reschedule = false; 248 struct gve_priv *priv; 249 int work_done = 0; 250 251 block = container_of(napi, struct gve_notify_block, napi); 252 priv = block->priv; 253 254 if (block->tx) { 255 if (block->tx->q_num < priv->tx_cfg.num_queues) 256 reschedule |= gve_tx_poll(block, budget); 257 else if (budget) 258 reschedule |= gve_xdp_poll(block, budget); 259 } 260 261 if (!budget) 262 return 0; 263 264 if (block->rx) { 265 work_done = gve_rx_poll(block, budget); 266 reschedule |= work_done == budget; 267 } 268 269 if (reschedule) 270 return budget; 271 272 /* Complete processing - don't unmask irq if busy polling is enabled */ 273 if (likely(napi_complete_done(napi, work_done))) { 274 irq_doorbell = gve_irq_doorbell(priv, block); 275 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 276 277 /* Ensure IRQ ACK is visible before we check pending work. 278 * If queue had issued updates, it would be truly visible. 279 */ 280 mb(); 281 282 if (block->tx) 283 reschedule |= gve_tx_clean_pending(priv, block->tx); 284 if (block->rx) 285 reschedule |= gve_rx_work_pending(block->rx); 286 287 if (reschedule && napi_reschedule(napi)) 288 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 289 } 290 return work_done; 291 } 292 293 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 294 { 295 struct gve_notify_block *block = 296 container_of(napi, struct gve_notify_block, napi); 297 struct gve_priv *priv = block->priv; 298 bool reschedule = false; 299 int work_done = 0; 300 301 if (block->tx) 302 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 303 304 if (!budget) 305 return 0; 306 307 if (block->rx) { 308 work_done = gve_rx_poll_dqo(block, budget); 309 reschedule |= work_done == budget; 310 } 311 312 if (reschedule) 313 return budget; 314 315 if (likely(napi_complete_done(napi, work_done))) { 316 /* Enable interrupts again. 317 * 318 * We don't need to repoll afterwards because HW supports the 319 * PCI MSI-X PBA feature. 320 * 321 * Another interrupt would be triggered if a new event came in 322 * since the last one. 323 */ 324 gve_write_irq_doorbell_dqo(priv, block, 325 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 326 } 327 328 return work_done; 329 } 330 331 static int gve_alloc_notify_blocks(struct gve_priv *priv) 332 { 333 int num_vecs_requested = priv->num_ntfy_blks + 1; 334 unsigned int active_cpus; 335 int vecs_enabled; 336 int i, j; 337 int err; 338 339 priv->msix_vectors = kvcalloc(num_vecs_requested, 340 sizeof(*priv->msix_vectors), GFP_KERNEL); 341 if (!priv->msix_vectors) 342 return -ENOMEM; 343 for (i = 0; i < num_vecs_requested; i++) 344 priv->msix_vectors[i].entry = i; 345 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 346 GVE_MIN_MSIX, num_vecs_requested); 347 if (vecs_enabled < 0) { 348 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 349 GVE_MIN_MSIX, vecs_enabled); 350 err = vecs_enabled; 351 goto abort_with_msix_vectors; 352 } 353 if (vecs_enabled != num_vecs_requested) { 354 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 355 int vecs_per_type = new_num_ntfy_blks / 2; 356 int vecs_left = new_num_ntfy_blks % 2; 357 358 priv->num_ntfy_blks = new_num_ntfy_blks; 359 priv->mgmt_msix_idx = priv->num_ntfy_blks; 360 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 361 vecs_per_type); 362 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 363 vecs_per_type + vecs_left); 364 dev_err(&priv->pdev->dev, 365 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 366 vecs_enabled, priv->tx_cfg.max_queues, 367 priv->rx_cfg.max_queues); 368 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 369 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 370 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 371 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 372 } 373 /* Half the notification blocks go to TX and half to RX */ 374 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 375 376 /* Setup Management Vector - the last vector */ 377 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 378 pci_name(priv->pdev)); 379 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 380 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 381 if (err) { 382 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 383 goto abort_with_msix_enabled; 384 } 385 priv->irq_db_indices = 386 dma_alloc_coherent(&priv->pdev->dev, 387 priv->num_ntfy_blks * 388 sizeof(*priv->irq_db_indices), 389 &priv->irq_db_indices_bus, GFP_KERNEL); 390 if (!priv->irq_db_indices) { 391 err = -ENOMEM; 392 goto abort_with_mgmt_vector; 393 } 394 395 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 396 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 397 if (!priv->ntfy_blocks) { 398 err = -ENOMEM; 399 goto abort_with_irq_db_indices; 400 } 401 402 /* Setup the other blocks - the first n-1 vectors */ 403 for (i = 0; i < priv->num_ntfy_blks; i++) { 404 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 405 int msix_idx = i; 406 407 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 408 i, pci_name(priv->pdev)); 409 block->priv = priv; 410 err = request_irq(priv->msix_vectors[msix_idx].vector, 411 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 412 0, block->name, block); 413 if (err) { 414 dev_err(&priv->pdev->dev, 415 "Failed to receive msix vector %d\n", i); 416 goto abort_with_some_ntfy_blocks; 417 } 418 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 419 get_cpu_mask(i % active_cpus)); 420 block->irq_db_index = &priv->irq_db_indices[i].index; 421 } 422 return 0; 423 abort_with_some_ntfy_blocks: 424 for (j = 0; j < i; j++) { 425 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 426 int msix_idx = j; 427 428 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 429 NULL); 430 free_irq(priv->msix_vectors[msix_idx].vector, block); 431 } 432 kvfree(priv->ntfy_blocks); 433 priv->ntfy_blocks = NULL; 434 abort_with_irq_db_indices: 435 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 436 sizeof(*priv->irq_db_indices), 437 priv->irq_db_indices, priv->irq_db_indices_bus); 438 priv->irq_db_indices = NULL; 439 abort_with_mgmt_vector: 440 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 441 abort_with_msix_enabled: 442 pci_disable_msix(priv->pdev); 443 abort_with_msix_vectors: 444 kvfree(priv->msix_vectors); 445 priv->msix_vectors = NULL; 446 return err; 447 } 448 449 static void gve_free_notify_blocks(struct gve_priv *priv) 450 { 451 int i; 452 453 if (!priv->msix_vectors) 454 return; 455 456 /* Free the irqs */ 457 for (i = 0; i < priv->num_ntfy_blks; i++) { 458 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 459 int msix_idx = i; 460 461 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 462 NULL); 463 free_irq(priv->msix_vectors[msix_idx].vector, block); 464 } 465 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 466 kvfree(priv->ntfy_blocks); 467 priv->ntfy_blocks = NULL; 468 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 469 sizeof(*priv->irq_db_indices), 470 priv->irq_db_indices, priv->irq_db_indices_bus); 471 priv->irq_db_indices = NULL; 472 pci_disable_msix(priv->pdev); 473 kvfree(priv->msix_vectors); 474 priv->msix_vectors = NULL; 475 } 476 477 static int gve_setup_device_resources(struct gve_priv *priv) 478 { 479 int err; 480 481 err = gve_alloc_counter_array(priv); 482 if (err) 483 return err; 484 err = gve_alloc_notify_blocks(priv); 485 if (err) 486 goto abort_with_counter; 487 err = gve_alloc_stats_report(priv); 488 if (err) 489 goto abort_with_ntfy_blocks; 490 err = gve_adminq_configure_device_resources(priv, 491 priv->counter_array_bus, 492 priv->num_event_counters, 493 priv->irq_db_indices_bus, 494 priv->num_ntfy_blks); 495 if (unlikely(err)) { 496 dev_err(&priv->pdev->dev, 497 "could not setup device_resources: err=%d\n", err); 498 err = -ENXIO; 499 goto abort_with_stats_report; 500 } 501 502 if (!gve_is_gqi(priv)) { 503 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 504 GFP_KERNEL); 505 if (!priv->ptype_lut_dqo) { 506 err = -ENOMEM; 507 goto abort_with_stats_report; 508 } 509 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 510 if (err) { 511 dev_err(&priv->pdev->dev, 512 "Failed to get ptype map: err=%d\n", err); 513 goto abort_with_ptype_lut; 514 } 515 } 516 517 err = gve_adminq_report_stats(priv, priv->stats_report_len, 518 priv->stats_report_bus, 519 GVE_STATS_REPORT_TIMER_PERIOD); 520 if (err) 521 dev_err(&priv->pdev->dev, 522 "Failed to report stats: err=%d\n", err); 523 gve_set_device_resources_ok(priv); 524 return 0; 525 526 abort_with_ptype_lut: 527 kvfree(priv->ptype_lut_dqo); 528 priv->ptype_lut_dqo = NULL; 529 abort_with_stats_report: 530 gve_free_stats_report(priv); 531 abort_with_ntfy_blocks: 532 gve_free_notify_blocks(priv); 533 abort_with_counter: 534 gve_free_counter_array(priv); 535 536 return err; 537 } 538 539 static void gve_trigger_reset(struct gve_priv *priv); 540 541 static void gve_teardown_device_resources(struct gve_priv *priv) 542 { 543 int err; 544 545 /* Tell device its resources are being freed */ 546 if (gve_get_device_resources_ok(priv)) { 547 /* detach the stats report */ 548 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 549 if (err) { 550 dev_err(&priv->pdev->dev, 551 "Failed to detach stats report: err=%d\n", err); 552 gve_trigger_reset(priv); 553 } 554 err = gve_adminq_deconfigure_device_resources(priv); 555 if (err) { 556 dev_err(&priv->pdev->dev, 557 "Could not deconfigure device resources: err=%d\n", 558 err); 559 gve_trigger_reset(priv); 560 } 561 } 562 563 kvfree(priv->ptype_lut_dqo); 564 priv->ptype_lut_dqo = NULL; 565 566 gve_free_counter_array(priv); 567 gve_free_notify_blocks(priv); 568 gve_free_stats_report(priv); 569 gve_clear_device_resources_ok(priv); 570 } 571 572 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 573 int (*gve_poll)(struct napi_struct *, int)) 574 { 575 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 576 577 netif_napi_add(priv->dev, &block->napi, gve_poll); 578 } 579 580 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 581 { 582 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 583 584 netif_napi_del(&block->napi); 585 } 586 587 static int gve_register_xdp_qpls(struct gve_priv *priv) 588 { 589 int start_id; 590 int err; 591 int i; 592 593 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 594 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 595 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 596 if (err) { 597 netif_err(priv, drv, priv->dev, 598 "failed to register queue page list %d\n", 599 priv->qpls[i].id); 600 /* This failure will trigger a reset - no need to clean 601 * up 602 */ 603 return err; 604 } 605 } 606 return 0; 607 } 608 609 static int gve_register_qpls(struct gve_priv *priv) 610 { 611 int start_id; 612 int err; 613 int i; 614 615 start_id = gve_tx_start_qpl_id(priv); 616 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 617 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 618 if (err) { 619 netif_err(priv, drv, priv->dev, 620 "failed to register queue page list %d\n", 621 priv->qpls[i].id); 622 /* This failure will trigger a reset - no need to clean 623 * up 624 */ 625 return err; 626 } 627 } 628 629 start_id = gve_rx_start_qpl_id(priv); 630 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 631 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 632 if (err) { 633 netif_err(priv, drv, priv->dev, 634 "failed to register queue page list %d\n", 635 priv->qpls[i].id); 636 /* This failure will trigger a reset - no need to clean 637 * up 638 */ 639 return err; 640 } 641 } 642 return 0; 643 } 644 645 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 646 { 647 int start_id; 648 int err; 649 int i; 650 651 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 652 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 653 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 654 /* This failure will trigger a reset - no need to clean up */ 655 if (err) { 656 netif_err(priv, drv, priv->dev, 657 "Failed to unregister queue page list %d\n", 658 priv->qpls[i].id); 659 return err; 660 } 661 } 662 return 0; 663 } 664 665 static int gve_unregister_qpls(struct gve_priv *priv) 666 { 667 int start_id; 668 int err; 669 int i; 670 671 start_id = gve_tx_start_qpl_id(priv); 672 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 673 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 674 /* This failure will trigger a reset - no need to clean up */ 675 if (err) { 676 netif_err(priv, drv, priv->dev, 677 "Failed to unregister queue page list %d\n", 678 priv->qpls[i].id); 679 return err; 680 } 681 } 682 683 start_id = gve_rx_start_qpl_id(priv); 684 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 685 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 686 /* This failure will trigger a reset - no need to clean up */ 687 if (err) { 688 netif_err(priv, drv, priv->dev, 689 "Failed to unregister queue page list %d\n", 690 priv->qpls[i].id); 691 return err; 692 } 693 } 694 return 0; 695 } 696 697 static int gve_create_xdp_rings(struct gve_priv *priv) 698 { 699 int err; 700 701 err = gve_adminq_create_tx_queues(priv, 702 gve_xdp_tx_start_queue_id(priv), 703 priv->num_xdp_queues); 704 if (err) { 705 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 706 priv->num_xdp_queues); 707 /* This failure will trigger a reset - no need to clean 708 * up 709 */ 710 return err; 711 } 712 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 713 priv->num_xdp_queues); 714 715 return 0; 716 } 717 718 static int gve_create_rings(struct gve_priv *priv) 719 { 720 int num_tx_queues = gve_num_tx_queues(priv); 721 int err; 722 int i; 723 724 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 725 if (err) { 726 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 727 num_tx_queues); 728 /* This failure will trigger a reset - no need to clean 729 * up 730 */ 731 return err; 732 } 733 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 734 num_tx_queues); 735 736 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 737 if (err) { 738 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 739 priv->rx_cfg.num_queues); 740 /* This failure will trigger a reset - no need to clean 741 * up 742 */ 743 return err; 744 } 745 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 746 priv->rx_cfg.num_queues); 747 748 if (gve_is_gqi(priv)) { 749 /* Rx data ring has been prefilled with packet buffers at queue 750 * allocation time. 751 * 752 * Write the doorbell to provide descriptor slots and packet 753 * buffers to the NIC. 754 */ 755 for (i = 0; i < priv->rx_cfg.num_queues; i++) 756 gve_rx_write_doorbell(priv, &priv->rx[i]); 757 } else { 758 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 759 /* Post buffers and ring doorbell. */ 760 gve_rx_post_buffers_dqo(&priv->rx[i]); 761 } 762 } 763 764 return 0; 765 } 766 767 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 768 int (*napi_poll)(struct napi_struct *napi, 769 int budget)) 770 { 771 int start_id = gve_xdp_tx_start_queue_id(priv); 772 int i; 773 774 /* Add xdp tx napi & init sync stats*/ 775 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 776 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 777 778 u64_stats_init(&priv->tx[i].statss); 779 priv->tx[i].ntfy_id = ntfy_idx; 780 gve_add_napi(priv, ntfy_idx, napi_poll); 781 } 782 } 783 784 static void add_napi_init_sync_stats(struct gve_priv *priv, 785 int (*napi_poll)(struct napi_struct *napi, 786 int budget)) 787 { 788 int i; 789 790 /* Add tx napi & init sync stats*/ 791 for (i = 0; i < gve_num_tx_queues(priv); i++) { 792 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 793 794 u64_stats_init(&priv->tx[i].statss); 795 priv->tx[i].ntfy_id = ntfy_idx; 796 gve_add_napi(priv, ntfy_idx, napi_poll); 797 } 798 /* Add rx napi & init sync stats*/ 799 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 800 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 801 802 u64_stats_init(&priv->rx[i].statss); 803 priv->rx[i].ntfy_id = ntfy_idx; 804 gve_add_napi(priv, ntfy_idx, napi_poll); 805 } 806 } 807 808 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 809 { 810 if (gve_is_gqi(priv)) { 811 gve_tx_free_rings_gqi(priv, start_id, num_rings); 812 } else { 813 gve_tx_free_rings_dqo(priv); 814 } 815 } 816 817 static int gve_alloc_xdp_rings(struct gve_priv *priv) 818 { 819 int start_id; 820 int err = 0; 821 822 if (!priv->num_xdp_queues) 823 return 0; 824 825 start_id = gve_xdp_tx_start_queue_id(priv); 826 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 827 if (err) 828 return err; 829 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 830 831 return 0; 832 } 833 834 static int gve_alloc_rings(struct gve_priv *priv) 835 { 836 int err; 837 838 /* Setup tx rings */ 839 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 840 GFP_KERNEL); 841 if (!priv->tx) 842 return -ENOMEM; 843 844 if (gve_is_gqi(priv)) 845 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 846 else 847 err = gve_tx_alloc_rings_dqo(priv); 848 if (err) 849 goto free_tx; 850 851 /* Setup rx rings */ 852 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 853 GFP_KERNEL); 854 if (!priv->rx) { 855 err = -ENOMEM; 856 goto free_tx_queue; 857 } 858 859 if (gve_is_gqi(priv)) 860 err = gve_rx_alloc_rings(priv); 861 else 862 err = gve_rx_alloc_rings_dqo(priv); 863 if (err) 864 goto free_rx; 865 866 if (gve_is_gqi(priv)) 867 add_napi_init_sync_stats(priv, gve_napi_poll); 868 else 869 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 870 871 return 0; 872 873 free_rx: 874 kvfree(priv->rx); 875 priv->rx = NULL; 876 free_tx_queue: 877 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 878 free_tx: 879 kvfree(priv->tx); 880 priv->tx = NULL; 881 return err; 882 } 883 884 static int gve_destroy_xdp_rings(struct gve_priv *priv) 885 { 886 int start_id; 887 int err; 888 889 start_id = gve_xdp_tx_start_queue_id(priv); 890 err = gve_adminq_destroy_tx_queues(priv, 891 start_id, 892 priv->num_xdp_queues); 893 if (err) { 894 netif_err(priv, drv, priv->dev, 895 "failed to destroy XDP queues\n"); 896 /* This failure will trigger a reset - no need to clean up */ 897 return err; 898 } 899 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 900 901 return 0; 902 } 903 904 static int gve_destroy_rings(struct gve_priv *priv) 905 { 906 int num_tx_queues = gve_num_tx_queues(priv); 907 int err; 908 909 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 910 if (err) { 911 netif_err(priv, drv, priv->dev, 912 "failed to destroy tx queues\n"); 913 /* This failure will trigger a reset - no need to clean up */ 914 return err; 915 } 916 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 917 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 918 if (err) { 919 netif_err(priv, drv, priv->dev, 920 "failed to destroy rx queues\n"); 921 /* This failure will trigger a reset - no need to clean up */ 922 return err; 923 } 924 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 925 return 0; 926 } 927 928 static void gve_rx_free_rings(struct gve_priv *priv) 929 { 930 if (gve_is_gqi(priv)) 931 gve_rx_free_rings_gqi(priv); 932 else 933 gve_rx_free_rings_dqo(priv); 934 } 935 936 static void gve_free_xdp_rings(struct gve_priv *priv) 937 { 938 int ntfy_idx, start_id; 939 int i; 940 941 start_id = gve_xdp_tx_start_queue_id(priv); 942 if (priv->tx) { 943 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 944 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 945 gve_remove_napi(priv, ntfy_idx); 946 } 947 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 948 } 949 } 950 951 static void gve_free_rings(struct gve_priv *priv) 952 { 953 int num_tx_queues = gve_num_tx_queues(priv); 954 int ntfy_idx; 955 int i; 956 957 if (priv->tx) { 958 for (i = 0; i < num_tx_queues; i++) { 959 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 960 gve_remove_napi(priv, ntfy_idx); 961 } 962 gve_tx_free_rings(priv, 0, num_tx_queues); 963 kvfree(priv->tx); 964 priv->tx = NULL; 965 } 966 if (priv->rx) { 967 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 968 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 969 gve_remove_napi(priv, ntfy_idx); 970 } 971 gve_rx_free_rings(priv); 972 kvfree(priv->rx); 973 priv->rx = NULL; 974 } 975 } 976 977 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 978 struct page **page, dma_addr_t *dma, 979 enum dma_data_direction dir, gfp_t gfp_flags) 980 { 981 *page = alloc_page(gfp_flags); 982 if (!*page) { 983 priv->page_alloc_fail++; 984 return -ENOMEM; 985 } 986 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 987 if (dma_mapping_error(dev, *dma)) { 988 priv->dma_mapping_error++; 989 put_page(*page); 990 return -ENOMEM; 991 } 992 return 0; 993 } 994 995 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 996 int pages) 997 { 998 struct gve_queue_page_list *qpl = &priv->qpls[id]; 999 int err; 1000 int i; 1001 1002 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 1003 netif_err(priv, drv, priv->dev, 1004 "Reached max number of registered pages %llu > %llu\n", 1005 pages + priv->num_registered_pages, 1006 priv->max_registered_pages); 1007 return -EINVAL; 1008 } 1009 1010 qpl->id = id; 1011 qpl->num_entries = 0; 1012 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1013 /* caller handles clean up */ 1014 if (!qpl->pages) 1015 return -ENOMEM; 1016 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1017 /* caller handles clean up */ 1018 if (!qpl->page_buses) 1019 return -ENOMEM; 1020 1021 for (i = 0; i < pages; i++) { 1022 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1023 &qpl->page_buses[i], 1024 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1025 /* caller handles clean up */ 1026 if (err) 1027 return -ENOMEM; 1028 qpl->num_entries++; 1029 } 1030 priv->num_registered_pages += pages; 1031 1032 return 0; 1033 } 1034 1035 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1036 enum dma_data_direction dir) 1037 { 1038 if (!dma_mapping_error(dev, dma)) 1039 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1040 if (page) 1041 put_page(page); 1042 } 1043 1044 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1045 { 1046 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1047 int i; 1048 1049 if (!qpl->pages) 1050 return; 1051 if (!qpl->page_buses) 1052 goto free_pages; 1053 1054 for (i = 0; i < qpl->num_entries; i++) 1055 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1056 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1057 1058 kvfree(qpl->page_buses); 1059 qpl->page_buses = NULL; 1060 free_pages: 1061 kvfree(qpl->pages); 1062 qpl->pages = NULL; 1063 priv->num_registered_pages -= qpl->num_entries; 1064 } 1065 1066 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1067 { 1068 int start_id; 1069 int i, j; 1070 int err; 1071 1072 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1073 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1074 err = gve_alloc_queue_page_list(priv, i, 1075 priv->tx_pages_per_qpl); 1076 if (err) 1077 goto free_qpls; 1078 } 1079 1080 return 0; 1081 1082 free_qpls: 1083 for (j = start_id; j <= i; j++) 1084 gve_free_queue_page_list(priv, j); 1085 return err; 1086 } 1087 1088 static int gve_alloc_qpls(struct gve_priv *priv) 1089 { 1090 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1091 int page_count; 1092 int start_id; 1093 int i, j; 1094 int err; 1095 1096 if (!gve_is_qpl(priv)) 1097 return 0; 1098 1099 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1100 if (!priv->qpls) 1101 return -ENOMEM; 1102 1103 start_id = gve_tx_start_qpl_id(priv); 1104 page_count = priv->tx_pages_per_qpl; 1105 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1106 err = gve_alloc_queue_page_list(priv, i, 1107 page_count); 1108 if (err) 1109 goto free_qpls; 1110 } 1111 1112 start_id = gve_rx_start_qpl_id(priv); 1113 1114 /* For GQI_QPL number of pages allocated have 1:1 relationship with 1115 * number of descriptors. For DQO, number of pages required are 1116 * more than descriptors (because of out of order completions). 1117 */ 1118 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? 1119 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; 1120 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1121 err = gve_alloc_queue_page_list(priv, i, 1122 page_count); 1123 if (err) 1124 goto free_qpls; 1125 } 1126 1127 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1128 sizeof(unsigned long) * BITS_PER_BYTE; 1129 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1130 sizeof(unsigned long), GFP_KERNEL); 1131 if (!priv->qpl_cfg.qpl_id_map) { 1132 err = -ENOMEM; 1133 goto free_qpls; 1134 } 1135 1136 return 0; 1137 1138 free_qpls: 1139 for (j = 0; j <= i; j++) 1140 gve_free_queue_page_list(priv, j); 1141 kvfree(priv->qpls); 1142 priv->qpls = NULL; 1143 return err; 1144 } 1145 1146 static void gve_free_xdp_qpls(struct gve_priv *priv) 1147 { 1148 int start_id; 1149 int i; 1150 1151 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1152 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1153 gve_free_queue_page_list(priv, i); 1154 } 1155 1156 static void gve_free_qpls(struct gve_priv *priv) 1157 { 1158 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1159 int i; 1160 1161 if (!priv->qpls) 1162 return; 1163 1164 kvfree(priv->qpl_cfg.qpl_id_map); 1165 priv->qpl_cfg.qpl_id_map = NULL; 1166 1167 for (i = 0; i < max_queues; i++) 1168 gve_free_queue_page_list(priv, i); 1169 1170 kvfree(priv->qpls); 1171 priv->qpls = NULL; 1172 } 1173 1174 /* Use this to schedule a reset when the device is capable of continuing 1175 * to handle other requests in its current state. If it is not, do a reset 1176 * in thread instead. 1177 */ 1178 void gve_schedule_reset(struct gve_priv *priv) 1179 { 1180 gve_set_do_reset(priv); 1181 queue_work(priv->gve_wq, &priv->service_task); 1182 } 1183 1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1186 static void gve_turndown(struct gve_priv *priv); 1187 static void gve_turnup(struct gve_priv *priv); 1188 1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1190 { 1191 struct napi_struct *napi; 1192 struct gve_rx_ring *rx; 1193 int err = 0; 1194 int i, j; 1195 u32 tx_qid; 1196 1197 if (!priv->num_xdp_queues) 1198 return 0; 1199 1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1201 rx = &priv->rx[i]; 1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1203 1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1205 napi->napi_id); 1206 if (err) 1207 goto err; 1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1209 MEM_TYPE_PAGE_SHARED, NULL); 1210 if (err) 1211 goto err; 1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1213 if (rx->xsk_pool) { 1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1215 napi->napi_id); 1216 if (err) 1217 goto err; 1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1219 MEM_TYPE_XSK_BUFF_POOL, NULL); 1220 if (err) 1221 goto err; 1222 xsk_pool_set_rxq_info(rx->xsk_pool, 1223 &rx->xsk_rxq); 1224 } 1225 } 1226 1227 for (i = 0; i < priv->num_xdp_queues; i++) { 1228 tx_qid = gve_xdp_tx_queue_id(priv, i); 1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1230 } 1231 return 0; 1232 1233 err: 1234 for (j = i; j >= 0; j--) { 1235 rx = &priv->rx[j]; 1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1237 xdp_rxq_info_unreg(&rx->xdp_rxq); 1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1239 xdp_rxq_info_unreg(&rx->xsk_rxq); 1240 } 1241 return err; 1242 } 1243 1244 static void gve_unreg_xdp_info(struct gve_priv *priv) 1245 { 1246 int i, tx_qid; 1247 1248 if (!priv->num_xdp_queues) 1249 return; 1250 1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1252 struct gve_rx_ring *rx = &priv->rx[i]; 1253 1254 xdp_rxq_info_unreg(&rx->xdp_rxq); 1255 if (rx->xsk_pool) { 1256 xdp_rxq_info_unreg(&rx->xsk_rxq); 1257 rx->xsk_pool = NULL; 1258 } 1259 } 1260 1261 for (i = 0; i < priv->num_xdp_queues; i++) { 1262 tx_qid = gve_xdp_tx_queue_id(priv, i); 1263 priv->tx[tx_qid].xsk_pool = NULL; 1264 } 1265 } 1266 1267 static void gve_drain_page_cache(struct gve_priv *priv) 1268 { 1269 struct page_frag_cache *nc; 1270 int i; 1271 1272 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1273 nc = &priv->rx[i].page_cache; 1274 if (nc->va) { 1275 __page_frag_cache_drain(virt_to_page(nc->va), 1276 nc->pagecnt_bias); 1277 nc->va = NULL; 1278 } 1279 } 1280 } 1281 1282 static int gve_open(struct net_device *dev) 1283 { 1284 struct gve_priv *priv = netdev_priv(dev); 1285 int err; 1286 1287 if (priv->xdp_prog) 1288 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1289 else 1290 priv->num_xdp_queues = 0; 1291 1292 err = gve_alloc_qpls(priv); 1293 if (err) 1294 return err; 1295 1296 err = gve_alloc_rings(priv); 1297 if (err) 1298 goto free_qpls; 1299 1300 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1301 if (err) 1302 goto free_rings; 1303 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1304 if (err) 1305 goto free_rings; 1306 1307 err = gve_reg_xdp_info(priv, dev); 1308 if (err) 1309 goto free_rings; 1310 1311 err = gve_register_qpls(priv); 1312 if (err) 1313 goto reset; 1314 1315 if (!gve_is_gqi(priv)) { 1316 /* Hard code this for now. This may be tuned in the future for 1317 * performance. 1318 */ 1319 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1320 } 1321 err = gve_create_rings(priv); 1322 if (err) 1323 goto reset; 1324 1325 gve_set_device_rings_ok(priv); 1326 1327 if (gve_get_report_stats(priv)) 1328 mod_timer(&priv->stats_report_timer, 1329 round_jiffies(jiffies + 1330 msecs_to_jiffies(priv->stats_report_timer_period))); 1331 1332 gve_turnup(priv); 1333 queue_work(priv->gve_wq, &priv->service_task); 1334 priv->interface_up_cnt++; 1335 return 0; 1336 1337 free_rings: 1338 gve_free_rings(priv); 1339 free_qpls: 1340 gve_free_qpls(priv); 1341 return err; 1342 1343 reset: 1344 /* This must have been called from a reset due to the rtnl lock 1345 * so just return at this point. 1346 */ 1347 if (gve_get_reset_in_progress(priv)) 1348 return err; 1349 /* Otherwise reset before returning */ 1350 gve_reset_and_teardown(priv, true); 1351 /* if this fails there is nothing we can do so just ignore the return */ 1352 gve_reset_recovery(priv, false); 1353 /* return the original error */ 1354 return err; 1355 } 1356 1357 static int gve_close(struct net_device *dev) 1358 { 1359 struct gve_priv *priv = netdev_priv(dev); 1360 int err; 1361 1362 netif_carrier_off(dev); 1363 if (gve_get_device_rings_ok(priv)) { 1364 gve_turndown(priv); 1365 gve_drain_page_cache(priv); 1366 err = gve_destroy_rings(priv); 1367 if (err) 1368 goto err; 1369 err = gve_unregister_qpls(priv); 1370 if (err) 1371 goto err; 1372 gve_clear_device_rings_ok(priv); 1373 } 1374 del_timer_sync(&priv->stats_report_timer); 1375 1376 gve_unreg_xdp_info(priv); 1377 gve_free_rings(priv); 1378 gve_free_qpls(priv); 1379 priv->interface_down_cnt++; 1380 return 0; 1381 1382 err: 1383 /* This must have been called from a reset due to the rtnl lock 1384 * so just return at this point. 1385 */ 1386 if (gve_get_reset_in_progress(priv)) 1387 return err; 1388 /* Otherwise reset before returning */ 1389 gve_reset_and_teardown(priv, true); 1390 return gve_reset_recovery(priv, false); 1391 } 1392 1393 static int gve_remove_xdp_queues(struct gve_priv *priv) 1394 { 1395 int err; 1396 1397 err = gve_destroy_xdp_rings(priv); 1398 if (err) 1399 return err; 1400 1401 err = gve_unregister_xdp_qpls(priv); 1402 if (err) 1403 return err; 1404 1405 gve_unreg_xdp_info(priv); 1406 gve_free_xdp_rings(priv); 1407 gve_free_xdp_qpls(priv); 1408 priv->num_xdp_queues = 0; 1409 return 0; 1410 } 1411 1412 static int gve_add_xdp_queues(struct gve_priv *priv) 1413 { 1414 int err; 1415 1416 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1417 1418 err = gve_alloc_xdp_qpls(priv); 1419 if (err) 1420 goto err; 1421 1422 err = gve_alloc_xdp_rings(priv); 1423 if (err) 1424 goto free_xdp_qpls; 1425 1426 err = gve_reg_xdp_info(priv, priv->dev); 1427 if (err) 1428 goto free_xdp_rings; 1429 1430 err = gve_register_xdp_qpls(priv); 1431 if (err) 1432 goto free_xdp_rings; 1433 1434 err = gve_create_xdp_rings(priv); 1435 if (err) 1436 goto free_xdp_rings; 1437 1438 return 0; 1439 1440 free_xdp_rings: 1441 gve_free_xdp_rings(priv); 1442 free_xdp_qpls: 1443 gve_free_xdp_qpls(priv); 1444 err: 1445 priv->num_xdp_queues = 0; 1446 return err; 1447 } 1448 1449 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1450 { 1451 if (!gve_get_napi_enabled(priv)) 1452 return; 1453 1454 if (link_status == netif_carrier_ok(priv->dev)) 1455 return; 1456 1457 if (link_status) { 1458 netdev_info(priv->dev, "Device link is up.\n"); 1459 netif_carrier_on(priv->dev); 1460 } else { 1461 netdev_info(priv->dev, "Device link is down.\n"); 1462 netif_carrier_off(priv->dev); 1463 } 1464 } 1465 1466 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1467 struct netlink_ext_ack *extack) 1468 { 1469 struct bpf_prog *old_prog; 1470 int err = 0; 1471 u32 status; 1472 1473 old_prog = READ_ONCE(priv->xdp_prog); 1474 if (!netif_carrier_ok(priv->dev)) { 1475 WRITE_ONCE(priv->xdp_prog, prog); 1476 if (old_prog) 1477 bpf_prog_put(old_prog); 1478 return 0; 1479 } 1480 1481 gve_turndown(priv); 1482 if (!old_prog && prog) { 1483 // Allocate XDP TX queues if an XDP program is 1484 // being installed 1485 err = gve_add_xdp_queues(priv); 1486 if (err) 1487 goto out; 1488 } else if (old_prog && !prog) { 1489 // Remove XDP TX queues if an XDP program is 1490 // being uninstalled 1491 err = gve_remove_xdp_queues(priv); 1492 if (err) 1493 goto out; 1494 } 1495 WRITE_ONCE(priv->xdp_prog, prog); 1496 if (old_prog) 1497 bpf_prog_put(old_prog); 1498 1499 out: 1500 gve_turnup(priv); 1501 status = ioread32be(&priv->reg_bar0->device_status); 1502 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1503 return err; 1504 } 1505 1506 static int gve_xsk_pool_enable(struct net_device *dev, 1507 struct xsk_buff_pool *pool, 1508 u16 qid) 1509 { 1510 struct gve_priv *priv = netdev_priv(dev); 1511 struct napi_struct *napi; 1512 struct gve_rx_ring *rx; 1513 int tx_qid; 1514 int err; 1515 1516 if (qid >= priv->rx_cfg.num_queues) { 1517 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1518 return -EINVAL; 1519 } 1520 if (xsk_pool_get_rx_frame_size(pool) < 1521 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1522 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1523 return -EINVAL; 1524 } 1525 1526 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1527 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1528 if (err) 1529 return err; 1530 1531 /* If XDP prog is not installed, return */ 1532 if (!priv->xdp_prog) 1533 return 0; 1534 1535 rx = &priv->rx[qid]; 1536 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1537 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1538 if (err) 1539 goto err; 1540 1541 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1542 MEM_TYPE_XSK_BUFF_POOL, NULL); 1543 if (err) 1544 goto err; 1545 1546 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1547 rx->xsk_pool = pool; 1548 1549 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1550 priv->tx[tx_qid].xsk_pool = pool; 1551 1552 return 0; 1553 err: 1554 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1555 xdp_rxq_info_unreg(&rx->xsk_rxq); 1556 1557 xsk_pool_dma_unmap(pool, 1558 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1559 return err; 1560 } 1561 1562 static int gve_xsk_pool_disable(struct net_device *dev, 1563 u16 qid) 1564 { 1565 struct gve_priv *priv = netdev_priv(dev); 1566 struct napi_struct *napi_rx; 1567 struct napi_struct *napi_tx; 1568 struct xsk_buff_pool *pool; 1569 int tx_qid; 1570 1571 pool = xsk_get_pool_from_qid(dev, qid); 1572 if (!pool) 1573 return -EINVAL; 1574 if (qid >= priv->rx_cfg.num_queues) 1575 return -EINVAL; 1576 1577 /* If XDP prog is not installed, unmap DMA and return */ 1578 if (!priv->xdp_prog) 1579 goto done; 1580 1581 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1582 if (!netif_running(dev)) { 1583 priv->rx[qid].xsk_pool = NULL; 1584 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1585 priv->tx[tx_qid].xsk_pool = NULL; 1586 goto done; 1587 } 1588 1589 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1590 napi_disable(napi_rx); /* make sure current rx poll is done */ 1591 1592 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1593 napi_disable(napi_tx); /* make sure current tx poll is done */ 1594 1595 priv->rx[qid].xsk_pool = NULL; 1596 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1597 priv->tx[tx_qid].xsk_pool = NULL; 1598 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1599 1600 napi_enable(napi_rx); 1601 if (gve_rx_work_pending(&priv->rx[qid])) 1602 napi_schedule(napi_rx); 1603 1604 napi_enable(napi_tx); 1605 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1606 napi_schedule(napi_tx); 1607 1608 done: 1609 xsk_pool_dma_unmap(pool, 1610 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1611 return 0; 1612 } 1613 1614 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1615 { 1616 struct gve_priv *priv = netdev_priv(dev); 1617 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1618 1619 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1620 return -EINVAL; 1621 1622 if (flags & XDP_WAKEUP_TX) { 1623 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1624 struct napi_struct *napi = 1625 &priv->ntfy_blocks[tx->ntfy_id].napi; 1626 1627 if (!napi_if_scheduled_mark_missed(napi)) { 1628 /* Call local_bh_enable to trigger SoftIRQ processing */ 1629 local_bh_disable(); 1630 napi_schedule(napi); 1631 local_bh_enable(); 1632 } 1633 1634 tx->xdp_xsk_wakeup++; 1635 } 1636 1637 return 0; 1638 } 1639 1640 static int verify_xdp_configuration(struct net_device *dev) 1641 { 1642 struct gve_priv *priv = netdev_priv(dev); 1643 1644 if (dev->features & NETIF_F_LRO) { 1645 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1646 return -EOPNOTSUPP; 1647 } 1648 1649 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1650 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1651 priv->queue_format); 1652 return -EOPNOTSUPP; 1653 } 1654 1655 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1656 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1657 dev->mtu); 1658 return -EOPNOTSUPP; 1659 } 1660 1661 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1662 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1663 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1664 priv->rx_cfg.num_queues, 1665 priv->tx_cfg.num_queues, 1666 priv->tx_cfg.max_queues); 1667 return -EINVAL; 1668 } 1669 return 0; 1670 } 1671 1672 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1673 { 1674 struct gve_priv *priv = netdev_priv(dev); 1675 int err; 1676 1677 err = verify_xdp_configuration(dev); 1678 if (err) 1679 return err; 1680 switch (xdp->command) { 1681 case XDP_SETUP_PROG: 1682 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1683 case XDP_SETUP_XSK_POOL: 1684 if (xdp->xsk.pool) 1685 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1686 else 1687 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1688 default: 1689 return -EINVAL; 1690 } 1691 } 1692 1693 int gve_adjust_queues(struct gve_priv *priv, 1694 struct gve_queue_config new_rx_config, 1695 struct gve_queue_config new_tx_config) 1696 { 1697 int err; 1698 1699 if (netif_carrier_ok(priv->dev)) { 1700 /* To make this process as simple as possible we teardown the 1701 * device, set the new configuration, and then bring the device 1702 * up again. 1703 */ 1704 err = gve_close(priv->dev); 1705 /* we have already tried to reset in close, 1706 * just fail at this point 1707 */ 1708 if (err) 1709 return err; 1710 priv->tx_cfg = new_tx_config; 1711 priv->rx_cfg = new_rx_config; 1712 1713 err = gve_open(priv->dev); 1714 if (err) 1715 goto err; 1716 1717 return 0; 1718 } 1719 /* Set the config for the next up. */ 1720 priv->tx_cfg = new_tx_config; 1721 priv->rx_cfg = new_rx_config; 1722 1723 return 0; 1724 err: 1725 netif_err(priv, drv, priv->dev, 1726 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1727 gve_turndown(priv); 1728 return err; 1729 } 1730 1731 static void gve_turndown(struct gve_priv *priv) 1732 { 1733 int idx; 1734 1735 if (netif_carrier_ok(priv->dev)) 1736 netif_carrier_off(priv->dev); 1737 1738 if (!gve_get_napi_enabled(priv)) 1739 return; 1740 1741 /* Disable napi to prevent more work from coming in */ 1742 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1743 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1744 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1745 1746 napi_disable(&block->napi); 1747 } 1748 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1749 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1750 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1751 1752 napi_disable(&block->napi); 1753 } 1754 1755 /* Stop tx queues */ 1756 netif_tx_disable(priv->dev); 1757 1758 gve_clear_napi_enabled(priv); 1759 gve_clear_report_stats(priv); 1760 } 1761 1762 static void gve_turnup(struct gve_priv *priv) 1763 { 1764 int idx; 1765 1766 /* Start the tx queues */ 1767 netif_tx_start_all_queues(priv->dev); 1768 1769 /* Enable napi and unmask interrupts for all queues */ 1770 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1771 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1772 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1773 1774 napi_enable(&block->napi); 1775 if (gve_is_gqi(priv)) { 1776 iowrite32be(0, gve_irq_doorbell(priv, block)); 1777 } else { 1778 gve_set_itr_coalesce_usecs_dqo(priv, block, 1779 priv->tx_coalesce_usecs); 1780 } 1781 } 1782 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1783 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1784 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1785 1786 napi_enable(&block->napi); 1787 if (gve_is_gqi(priv)) { 1788 iowrite32be(0, gve_irq_doorbell(priv, block)); 1789 } else { 1790 gve_set_itr_coalesce_usecs_dqo(priv, block, 1791 priv->rx_coalesce_usecs); 1792 } 1793 } 1794 1795 gve_set_napi_enabled(priv); 1796 } 1797 1798 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1799 { 1800 struct gve_notify_block *block; 1801 struct gve_tx_ring *tx = NULL; 1802 struct gve_priv *priv; 1803 u32 last_nic_done; 1804 u32 current_time; 1805 u32 ntfy_idx; 1806 1807 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1808 priv = netdev_priv(dev); 1809 if (txqueue > priv->tx_cfg.num_queues) 1810 goto reset; 1811 1812 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1813 if (ntfy_idx >= priv->num_ntfy_blks) 1814 goto reset; 1815 1816 block = &priv->ntfy_blocks[ntfy_idx]; 1817 tx = block->tx; 1818 1819 current_time = jiffies_to_msecs(jiffies); 1820 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1821 goto reset; 1822 1823 /* Check to see if there are missed completions, which will allow us to 1824 * kick the queue. 1825 */ 1826 last_nic_done = gve_tx_load_event_counter(priv, tx); 1827 if (last_nic_done - tx->done) { 1828 netdev_info(dev, "Kicking queue %d", txqueue); 1829 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1830 napi_schedule(&block->napi); 1831 tx->last_kick_msec = current_time; 1832 goto out; 1833 } // Else reset. 1834 1835 reset: 1836 gve_schedule_reset(priv); 1837 1838 out: 1839 if (tx) 1840 tx->queue_timeout++; 1841 priv->tx_timeo_cnt++; 1842 } 1843 1844 static int gve_set_features(struct net_device *netdev, 1845 netdev_features_t features) 1846 { 1847 const netdev_features_t orig_features = netdev->features; 1848 struct gve_priv *priv = netdev_priv(netdev); 1849 int err; 1850 1851 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1852 netdev->features ^= NETIF_F_LRO; 1853 if (netif_carrier_ok(netdev)) { 1854 /* To make this process as simple as possible we 1855 * teardown the device, set the new configuration, 1856 * and then bring the device up again. 1857 */ 1858 err = gve_close(netdev); 1859 /* We have already tried to reset in close, just fail 1860 * at this point. 1861 */ 1862 if (err) 1863 goto err; 1864 1865 err = gve_open(netdev); 1866 if (err) 1867 goto err; 1868 } 1869 } 1870 1871 return 0; 1872 err: 1873 /* Reverts the change on error. */ 1874 netdev->features = orig_features; 1875 netif_err(priv, drv, netdev, 1876 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1877 return err; 1878 } 1879 1880 static const struct net_device_ops gve_netdev_ops = { 1881 .ndo_start_xmit = gve_start_xmit, 1882 .ndo_open = gve_open, 1883 .ndo_stop = gve_close, 1884 .ndo_get_stats64 = gve_get_stats, 1885 .ndo_tx_timeout = gve_tx_timeout, 1886 .ndo_set_features = gve_set_features, 1887 .ndo_bpf = gve_xdp, 1888 .ndo_xdp_xmit = gve_xdp_xmit, 1889 .ndo_xsk_wakeup = gve_xsk_wakeup, 1890 }; 1891 1892 static void gve_handle_status(struct gve_priv *priv, u32 status) 1893 { 1894 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1895 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1896 gve_set_do_reset(priv); 1897 } 1898 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1899 priv->stats_report_trigger_cnt++; 1900 gve_set_do_report_stats(priv); 1901 } 1902 } 1903 1904 static void gve_handle_reset(struct gve_priv *priv) 1905 { 1906 /* A service task will be scheduled at the end of probe to catch any 1907 * resets that need to happen, and we don't want to reset until 1908 * probe is done. 1909 */ 1910 if (gve_get_probe_in_progress(priv)) 1911 return; 1912 1913 if (gve_get_do_reset(priv)) { 1914 rtnl_lock(); 1915 gve_reset(priv, false); 1916 rtnl_unlock(); 1917 } 1918 } 1919 1920 void gve_handle_report_stats(struct gve_priv *priv) 1921 { 1922 struct stats *stats = priv->stats_report->stats; 1923 int idx, stats_idx = 0; 1924 unsigned int start = 0; 1925 u64 tx_bytes; 1926 1927 if (!gve_get_report_stats(priv)) 1928 return; 1929 1930 be64_add_cpu(&priv->stats_report->written_count, 1); 1931 /* tx stats */ 1932 if (priv->tx) { 1933 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1934 u32 last_completion = 0; 1935 u32 tx_frames = 0; 1936 1937 /* DQO doesn't currently support these metrics. */ 1938 if (gve_is_gqi(priv)) { 1939 last_completion = priv->tx[idx].done; 1940 tx_frames = priv->tx[idx].req; 1941 } 1942 1943 do { 1944 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1945 tx_bytes = priv->tx[idx].bytes_done; 1946 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1947 stats[stats_idx++] = (struct stats) { 1948 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1949 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1950 .queue_id = cpu_to_be32(idx), 1951 }; 1952 stats[stats_idx++] = (struct stats) { 1953 .stat_name = cpu_to_be32(TX_STOP_CNT), 1954 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1955 .queue_id = cpu_to_be32(idx), 1956 }; 1957 stats[stats_idx++] = (struct stats) { 1958 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1959 .value = cpu_to_be64(tx_frames), 1960 .queue_id = cpu_to_be32(idx), 1961 }; 1962 stats[stats_idx++] = (struct stats) { 1963 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1964 .value = cpu_to_be64(tx_bytes), 1965 .queue_id = cpu_to_be32(idx), 1966 }; 1967 stats[stats_idx++] = (struct stats) { 1968 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1969 .value = cpu_to_be64(last_completion), 1970 .queue_id = cpu_to_be32(idx), 1971 }; 1972 stats[stats_idx++] = (struct stats) { 1973 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1974 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1975 .queue_id = cpu_to_be32(idx), 1976 }; 1977 } 1978 } 1979 /* rx stats */ 1980 if (priv->rx) { 1981 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1982 stats[stats_idx++] = (struct stats) { 1983 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1984 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1985 .queue_id = cpu_to_be32(idx), 1986 }; 1987 stats[stats_idx++] = (struct stats) { 1988 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1989 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1990 .queue_id = cpu_to_be32(idx), 1991 }; 1992 } 1993 } 1994 } 1995 1996 /* Handle NIC status register changes, reset requests and report stats */ 1997 static void gve_service_task(struct work_struct *work) 1998 { 1999 struct gve_priv *priv = container_of(work, struct gve_priv, 2000 service_task); 2001 u32 status = ioread32be(&priv->reg_bar0->device_status); 2002 2003 gve_handle_status(priv, status); 2004 2005 gve_handle_reset(priv); 2006 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2007 } 2008 2009 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2010 { 2011 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2012 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 2013 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2014 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2015 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2016 } else { 2017 priv->dev->xdp_features = 0; 2018 } 2019 } 2020 2021 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2022 { 2023 int num_ntfy; 2024 int err; 2025 2026 /* Set up the adminq */ 2027 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2028 if (err) { 2029 dev_err(&priv->pdev->dev, 2030 "Failed to alloc admin queue: err=%d\n", err); 2031 return err; 2032 } 2033 2034 err = gve_verify_driver_compatibility(priv); 2035 if (err) { 2036 dev_err(&priv->pdev->dev, 2037 "Could not verify driver compatibility: err=%d\n", err); 2038 goto err; 2039 } 2040 2041 if (skip_describe_device) 2042 goto setup_device; 2043 2044 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2045 /* Get the initial information we need from the device */ 2046 err = gve_adminq_describe_device(priv); 2047 if (err) { 2048 dev_err(&priv->pdev->dev, 2049 "Could not get device information: err=%d\n", err); 2050 goto err; 2051 } 2052 priv->dev->mtu = priv->dev->max_mtu; 2053 num_ntfy = pci_msix_vec_count(priv->pdev); 2054 if (num_ntfy <= 0) { 2055 dev_err(&priv->pdev->dev, 2056 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2057 err = num_ntfy; 2058 goto err; 2059 } else if (num_ntfy < GVE_MIN_MSIX) { 2060 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2061 GVE_MIN_MSIX, num_ntfy); 2062 err = -EINVAL; 2063 goto err; 2064 } 2065 2066 /* Big TCP is only supported on DQ*/ 2067 if (!gve_is_gqi(priv)) 2068 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2069 2070 priv->num_registered_pages = 0; 2071 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2072 /* gvnic has one Notification Block per MSI-x vector, except for the 2073 * management vector 2074 */ 2075 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2076 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2077 2078 priv->tx_cfg.max_queues = 2079 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2080 priv->rx_cfg.max_queues = 2081 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2082 2083 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2084 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2085 if (priv->default_num_queues > 0) { 2086 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2087 priv->tx_cfg.num_queues); 2088 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2089 priv->rx_cfg.num_queues); 2090 } 2091 2092 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2093 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2094 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2095 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2096 2097 if (!gve_is_gqi(priv)) { 2098 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2099 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2100 } 2101 2102 setup_device: 2103 gve_set_netdev_xdp_features(priv); 2104 err = gve_setup_device_resources(priv); 2105 if (!err) 2106 return 0; 2107 err: 2108 gve_adminq_free(&priv->pdev->dev, priv); 2109 return err; 2110 } 2111 2112 static void gve_teardown_priv_resources(struct gve_priv *priv) 2113 { 2114 gve_teardown_device_resources(priv); 2115 gve_adminq_free(&priv->pdev->dev, priv); 2116 } 2117 2118 static void gve_trigger_reset(struct gve_priv *priv) 2119 { 2120 /* Reset the device by releasing the AQ */ 2121 gve_adminq_release(priv); 2122 } 2123 2124 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2125 { 2126 gve_trigger_reset(priv); 2127 /* With the reset having already happened, close cannot fail */ 2128 if (was_up) 2129 gve_close(priv->dev); 2130 gve_teardown_priv_resources(priv); 2131 } 2132 2133 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2134 { 2135 int err; 2136 2137 err = gve_init_priv(priv, true); 2138 if (err) 2139 goto err; 2140 if (was_up) { 2141 err = gve_open(priv->dev); 2142 if (err) 2143 goto err; 2144 } 2145 return 0; 2146 err: 2147 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2148 gve_turndown(priv); 2149 return err; 2150 } 2151 2152 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2153 { 2154 bool was_up = netif_carrier_ok(priv->dev); 2155 int err; 2156 2157 dev_info(&priv->pdev->dev, "Performing reset\n"); 2158 gve_clear_do_reset(priv); 2159 gve_set_reset_in_progress(priv); 2160 /* If we aren't attempting to teardown normally, just go turndown and 2161 * reset right away. 2162 */ 2163 if (!attempt_teardown) { 2164 gve_turndown(priv); 2165 gve_reset_and_teardown(priv, was_up); 2166 } else { 2167 /* Otherwise attempt to close normally */ 2168 if (was_up) { 2169 err = gve_close(priv->dev); 2170 /* If that fails reset as we did above */ 2171 if (err) 2172 gve_reset_and_teardown(priv, was_up); 2173 } 2174 /* Clean up any remaining resources */ 2175 gve_teardown_priv_resources(priv); 2176 } 2177 2178 /* Set it all back up */ 2179 err = gve_reset_recovery(priv, was_up); 2180 gve_clear_reset_in_progress(priv); 2181 priv->reset_cnt++; 2182 priv->interface_up_cnt = 0; 2183 priv->interface_down_cnt = 0; 2184 priv->stats_report_trigger_cnt = 0; 2185 return err; 2186 } 2187 2188 static void gve_write_version(u8 __iomem *driver_version_register) 2189 { 2190 const char *c = gve_version_prefix; 2191 2192 while (*c) { 2193 writeb(*c, driver_version_register); 2194 c++; 2195 } 2196 2197 c = gve_version_str; 2198 while (*c) { 2199 writeb(*c, driver_version_register); 2200 c++; 2201 } 2202 writeb('\n', driver_version_register); 2203 } 2204 2205 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2206 { 2207 int max_tx_queues, max_rx_queues; 2208 struct net_device *dev; 2209 __be32 __iomem *db_bar; 2210 struct gve_registers __iomem *reg_bar; 2211 struct gve_priv *priv; 2212 int err; 2213 2214 err = pci_enable_device(pdev); 2215 if (err) 2216 return err; 2217 2218 err = pci_request_regions(pdev, gve_driver_name); 2219 if (err) 2220 goto abort_with_enabled; 2221 2222 pci_set_master(pdev); 2223 2224 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2225 if (err) { 2226 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2227 goto abort_with_pci_region; 2228 } 2229 2230 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2231 if (!reg_bar) { 2232 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2233 err = -ENOMEM; 2234 goto abort_with_pci_region; 2235 } 2236 2237 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2238 if (!db_bar) { 2239 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2240 err = -ENOMEM; 2241 goto abort_with_reg_bar; 2242 } 2243 2244 gve_write_version(®_bar->driver_version); 2245 /* Get max queues to alloc etherdev */ 2246 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2247 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2248 /* Alloc and setup the netdev and priv */ 2249 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2250 if (!dev) { 2251 dev_err(&pdev->dev, "could not allocate netdev\n"); 2252 err = -ENOMEM; 2253 goto abort_with_db_bar; 2254 } 2255 SET_NETDEV_DEV(dev, &pdev->dev); 2256 pci_set_drvdata(pdev, dev); 2257 dev->ethtool_ops = &gve_ethtool_ops; 2258 dev->netdev_ops = &gve_netdev_ops; 2259 2260 /* Set default and supported features. 2261 * 2262 * Features might be set in other locations as well (such as 2263 * `gve_adminq_describe_device`). 2264 */ 2265 dev->hw_features = NETIF_F_HIGHDMA; 2266 dev->hw_features |= NETIF_F_SG; 2267 dev->hw_features |= NETIF_F_HW_CSUM; 2268 dev->hw_features |= NETIF_F_TSO; 2269 dev->hw_features |= NETIF_F_TSO6; 2270 dev->hw_features |= NETIF_F_TSO_ECN; 2271 dev->hw_features |= NETIF_F_RXCSUM; 2272 dev->hw_features |= NETIF_F_RXHASH; 2273 dev->features = dev->hw_features; 2274 dev->watchdog_timeo = 5 * HZ; 2275 dev->min_mtu = ETH_MIN_MTU; 2276 netif_carrier_off(dev); 2277 2278 priv = netdev_priv(dev); 2279 priv->dev = dev; 2280 priv->pdev = pdev; 2281 priv->msg_enable = DEFAULT_MSG_LEVEL; 2282 priv->reg_bar0 = reg_bar; 2283 priv->db_bar2 = db_bar; 2284 priv->service_task_flags = 0x0; 2285 priv->state_flags = 0x0; 2286 priv->ethtool_flags = 0x0; 2287 2288 gve_set_probe_in_progress(priv); 2289 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2290 if (!priv->gve_wq) { 2291 dev_err(&pdev->dev, "Could not allocate workqueue"); 2292 err = -ENOMEM; 2293 goto abort_with_netdev; 2294 } 2295 INIT_WORK(&priv->service_task, gve_service_task); 2296 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2297 priv->tx_cfg.max_queues = max_tx_queues; 2298 priv->rx_cfg.max_queues = max_rx_queues; 2299 2300 err = gve_init_priv(priv, false); 2301 if (err) 2302 goto abort_with_wq; 2303 2304 err = register_netdev(dev); 2305 if (err) 2306 goto abort_with_gve_init; 2307 2308 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2309 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2310 gve_clear_probe_in_progress(priv); 2311 queue_work(priv->gve_wq, &priv->service_task); 2312 return 0; 2313 2314 abort_with_gve_init: 2315 gve_teardown_priv_resources(priv); 2316 2317 abort_with_wq: 2318 destroy_workqueue(priv->gve_wq); 2319 2320 abort_with_netdev: 2321 free_netdev(dev); 2322 2323 abort_with_db_bar: 2324 pci_iounmap(pdev, db_bar); 2325 2326 abort_with_reg_bar: 2327 pci_iounmap(pdev, reg_bar); 2328 2329 abort_with_pci_region: 2330 pci_release_regions(pdev); 2331 2332 abort_with_enabled: 2333 pci_disable_device(pdev); 2334 return err; 2335 } 2336 2337 static void gve_remove(struct pci_dev *pdev) 2338 { 2339 struct net_device *netdev = pci_get_drvdata(pdev); 2340 struct gve_priv *priv = netdev_priv(netdev); 2341 __be32 __iomem *db_bar = priv->db_bar2; 2342 void __iomem *reg_bar = priv->reg_bar0; 2343 2344 unregister_netdev(netdev); 2345 gve_teardown_priv_resources(priv); 2346 destroy_workqueue(priv->gve_wq); 2347 free_netdev(netdev); 2348 pci_iounmap(pdev, db_bar); 2349 pci_iounmap(pdev, reg_bar); 2350 pci_release_regions(pdev); 2351 pci_disable_device(pdev); 2352 } 2353 2354 static void gve_shutdown(struct pci_dev *pdev) 2355 { 2356 struct net_device *netdev = pci_get_drvdata(pdev); 2357 struct gve_priv *priv = netdev_priv(netdev); 2358 bool was_up = netif_carrier_ok(priv->dev); 2359 2360 rtnl_lock(); 2361 if (was_up && gve_close(priv->dev)) { 2362 /* If the dev was up, attempt to close, if close fails, reset */ 2363 gve_reset_and_teardown(priv, was_up); 2364 } else { 2365 /* If the dev wasn't up or close worked, finish tearing down */ 2366 gve_teardown_priv_resources(priv); 2367 } 2368 rtnl_unlock(); 2369 } 2370 2371 #ifdef CONFIG_PM 2372 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2373 { 2374 struct net_device *netdev = pci_get_drvdata(pdev); 2375 struct gve_priv *priv = netdev_priv(netdev); 2376 bool was_up = netif_carrier_ok(priv->dev); 2377 2378 priv->suspend_cnt++; 2379 rtnl_lock(); 2380 if (was_up && gve_close(priv->dev)) { 2381 /* If the dev was up, attempt to close, if close fails, reset */ 2382 gve_reset_and_teardown(priv, was_up); 2383 } else { 2384 /* If the dev wasn't up or close worked, finish tearing down */ 2385 gve_teardown_priv_resources(priv); 2386 } 2387 priv->up_before_suspend = was_up; 2388 rtnl_unlock(); 2389 return 0; 2390 } 2391 2392 static int gve_resume(struct pci_dev *pdev) 2393 { 2394 struct net_device *netdev = pci_get_drvdata(pdev); 2395 struct gve_priv *priv = netdev_priv(netdev); 2396 int err; 2397 2398 priv->resume_cnt++; 2399 rtnl_lock(); 2400 err = gve_reset_recovery(priv, priv->up_before_suspend); 2401 rtnl_unlock(); 2402 return err; 2403 } 2404 #endif /* CONFIG_PM */ 2405 2406 static const struct pci_device_id gve_id_table[] = { 2407 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2408 { } 2409 }; 2410 2411 static struct pci_driver gve_driver = { 2412 .name = gve_driver_name, 2413 .id_table = gve_id_table, 2414 .probe = gve_probe, 2415 .remove = gve_remove, 2416 .shutdown = gve_shutdown, 2417 #ifdef CONFIG_PM 2418 .suspend = gve_suspend, 2419 .resume = gve_resume, 2420 #endif 2421 }; 2422 2423 module_pci_driver(gve_driver); 2424 2425 MODULE_DEVICE_TABLE(pci, gve_id_table); 2426 MODULE_AUTHOR("Google, Inc."); 2427 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2428 MODULE_LICENSE("Dual MIT/GPL"); 2429 MODULE_VERSION(GVE_VERSION); 2430