1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 35 const char gve_version_str[] = GVE_VERSION; 36 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 37 38 static int gve_verify_driver_compatibility(struct gve_priv *priv) 39 { 40 int err; 41 struct gve_driver_info *driver_info; 42 dma_addr_t driver_info_bus; 43 44 driver_info = dma_alloc_coherent(&priv->pdev->dev, 45 sizeof(struct gve_driver_info), 46 &driver_info_bus, GFP_KERNEL); 47 if (!driver_info) 48 return -ENOMEM; 49 50 *driver_info = (struct gve_driver_info) { 51 .os_type = 1, /* Linux */ 52 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 53 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 54 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 55 .driver_capability_flags = { 56 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 60 }, 61 }; 62 strscpy(driver_info->os_version_str1, utsname()->release, 63 sizeof(driver_info->os_version_str1)); 64 strscpy(driver_info->os_version_str2, utsname()->version, 65 sizeof(driver_info->os_version_str2)); 66 67 err = gve_adminq_verify_driver_compatibility(priv, 68 sizeof(struct gve_driver_info), 69 driver_info_bus); 70 71 /* It's ok if the device doesn't support this */ 72 if (err == -EOPNOTSUPP) 73 err = 0; 74 75 dma_free_coherent(&priv->pdev->dev, 76 sizeof(struct gve_driver_info), 77 driver_info, driver_info_bus); 78 return err; 79 } 80 81 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 82 { 83 struct gve_priv *priv = netdev_priv(dev); 84 85 if (gve_is_gqi(priv)) 86 return gve_tx(skb, dev); 87 else 88 return gve_tx_dqo(skb, dev); 89 } 90 91 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 92 { 93 struct gve_priv *priv = netdev_priv(dev); 94 unsigned int start; 95 u64 packets, bytes; 96 int num_tx_queues; 97 int ring; 98 99 num_tx_queues = gve_num_tx_queues(priv); 100 if (priv->rx) { 101 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 102 do { 103 start = 104 u64_stats_fetch_begin(&priv->rx[ring].statss); 105 packets = priv->rx[ring].rpackets; 106 bytes = priv->rx[ring].rbytes; 107 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 108 start)); 109 s->rx_packets += packets; 110 s->rx_bytes += bytes; 111 } 112 } 113 if (priv->tx) { 114 for (ring = 0; ring < num_tx_queues; ring++) { 115 do { 116 start = 117 u64_stats_fetch_begin(&priv->tx[ring].statss); 118 packets = priv->tx[ring].pkt_done; 119 bytes = priv->tx[ring].bytes_done; 120 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 121 start)); 122 s->tx_packets += packets; 123 s->tx_bytes += bytes; 124 } 125 } 126 } 127 128 static int gve_alloc_counter_array(struct gve_priv *priv) 129 { 130 priv->counter_array = 131 dma_alloc_coherent(&priv->pdev->dev, 132 priv->num_event_counters * 133 sizeof(*priv->counter_array), 134 &priv->counter_array_bus, GFP_KERNEL); 135 if (!priv->counter_array) 136 return -ENOMEM; 137 138 return 0; 139 } 140 141 static void gve_free_counter_array(struct gve_priv *priv) 142 { 143 if (!priv->counter_array) 144 return; 145 146 dma_free_coherent(&priv->pdev->dev, 147 priv->num_event_counters * 148 sizeof(*priv->counter_array), 149 priv->counter_array, priv->counter_array_bus); 150 priv->counter_array = NULL; 151 } 152 153 /* NIC requests to report stats */ 154 static void gve_stats_report_task(struct work_struct *work) 155 { 156 struct gve_priv *priv = container_of(work, struct gve_priv, 157 stats_report_task); 158 if (gve_get_do_report_stats(priv)) { 159 gve_handle_report_stats(priv); 160 gve_clear_do_report_stats(priv); 161 } 162 } 163 164 static void gve_stats_report_schedule(struct gve_priv *priv) 165 { 166 if (!gve_get_probe_in_progress(priv) && 167 !gve_get_reset_in_progress(priv)) { 168 gve_set_do_report_stats(priv); 169 queue_work(priv->gve_wq, &priv->stats_report_task); 170 } 171 } 172 173 static void gve_stats_report_timer(struct timer_list *t) 174 { 175 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 176 177 mod_timer(&priv->stats_report_timer, 178 round_jiffies(jiffies + 179 msecs_to_jiffies(priv->stats_report_timer_period))); 180 gve_stats_report_schedule(priv); 181 } 182 183 static int gve_alloc_stats_report(struct gve_priv *priv) 184 { 185 int tx_stats_num, rx_stats_num; 186 187 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 188 gve_num_tx_queues(priv); 189 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 190 priv->rx_cfg.num_queues; 191 priv->stats_report_len = struct_size(priv->stats_report, stats, 192 tx_stats_num + rx_stats_num); 193 priv->stats_report = 194 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 195 &priv->stats_report_bus, GFP_KERNEL); 196 if (!priv->stats_report) 197 return -ENOMEM; 198 /* Set up timer for the report-stats task */ 199 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 200 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 201 return 0; 202 } 203 204 static void gve_free_stats_report(struct gve_priv *priv) 205 { 206 if (!priv->stats_report) 207 return; 208 209 del_timer_sync(&priv->stats_report_timer); 210 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 211 priv->stats_report, priv->stats_report_bus); 212 priv->stats_report = NULL; 213 } 214 215 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 216 { 217 struct gve_priv *priv = arg; 218 219 queue_work(priv->gve_wq, &priv->service_task); 220 return IRQ_HANDLED; 221 } 222 223 static irqreturn_t gve_intr(int irq, void *arg) 224 { 225 struct gve_notify_block *block = arg; 226 struct gve_priv *priv = block->priv; 227 228 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 229 napi_schedule_irqoff(&block->napi); 230 return IRQ_HANDLED; 231 } 232 233 static irqreturn_t gve_intr_dqo(int irq, void *arg) 234 { 235 struct gve_notify_block *block = arg; 236 237 /* Interrupts are automatically masked */ 238 napi_schedule_irqoff(&block->napi); 239 return IRQ_HANDLED; 240 } 241 242 static int gve_napi_poll(struct napi_struct *napi, int budget) 243 { 244 struct gve_notify_block *block; 245 __be32 __iomem *irq_doorbell; 246 bool reschedule = false; 247 struct gve_priv *priv; 248 int work_done = 0; 249 250 block = container_of(napi, struct gve_notify_block, napi); 251 priv = block->priv; 252 253 if (block->tx) { 254 if (block->tx->q_num < priv->tx_cfg.num_queues) 255 reschedule |= gve_tx_poll(block, budget); 256 else 257 reschedule |= gve_xdp_poll(block, budget); 258 } 259 260 if (block->rx) { 261 work_done = gve_rx_poll(block, budget); 262 reschedule |= work_done == budget; 263 } 264 265 if (reschedule) 266 return budget; 267 268 /* Complete processing - don't unmask irq if busy polling is enabled */ 269 if (likely(napi_complete_done(napi, work_done))) { 270 irq_doorbell = gve_irq_doorbell(priv, block); 271 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 272 273 /* Ensure IRQ ACK is visible before we check pending work. 274 * If queue had issued updates, it would be truly visible. 275 */ 276 mb(); 277 278 if (block->tx) 279 reschedule |= gve_tx_clean_pending(priv, block->tx); 280 if (block->rx) 281 reschedule |= gve_rx_work_pending(block->rx); 282 283 if (reschedule && napi_reschedule(napi)) 284 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 285 } 286 return work_done; 287 } 288 289 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 290 { 291 struct gve_notify_block *block = 292 container_of(napi, struct gve_notify_block, napi); 293 struct gve_priv *priv = block->priv; 294 bool reschedule = false; 295 int work_done = 0; 296 297 if (block->tx) 298 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 299 300 if (block->rx) { 301 work_done = gve_rx_poll_dqo(block, budget); 302 reschedule |= work_done == budget; 303 } 304 305 if (reschedule) 306 return budget; 307 308 if (likely(napi_complete_done(napi, work_done))) { 309 /* Enable interrupts again. 310 * 311 * We don't need to repoll afterwards because HW supports the 312 * PCI MSI-X PBA feature. 313 * 314 * Another interrupt would be triggered if a new event came in 315 * since the last one. 316 */ 317 gve_write_irq_doorbell_dqo(priv, block, 318 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 319 } 320 321 return work_done; 322 } 323 324 static int gve_alloc_notify_blocks(struct gve_priv *priv) 325 { 326 int num_vecs_requested = priv->num_ntfy_blks + 1; 327 unsigned int active_cpus; 328 int vecs_enabled; 329 int i, j; 330 int err; 331 332 priv->msix_vectors = kvcalloc(num_vecs_requested, 333 sizeof(*priv->msix_vectors), GFP_KERNEL); 334 if (!priv->msix_vectors) 335 return -ENOMEM; 336 for (i = 0; i < num_vecs_requested; i++) 337 priv->msix_vectors[i].entry = i; 338 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 339 GVE_MIN_MSIX, num_vecs_requested); 340 if (vecs_enabled < 0) { 341 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 342 GVE_MIN_MSIX, vecs_enabled); 343 err = vecs_enabled; 344 goto abort_with_msix_vectors; 345 } 346 if (vecs_enabled != num_vecs_requested) { 347 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 348 int vecs_per_type = new_num_ntfy_blks / 2; 349 int vecs_left = new_num_ntfy_blks % 2; 350 351 priv->num_ntfy_blks = new_num_ntfy_blks; 352 priv->mgmt_msix_idx = priv->num_ntfy_blks; 353 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 354 vecs_per_type); 355 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 356 vecs_per_type + vecs_left); 357 dev_err(&priv->pdev->dev, 358 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 359 vecs_enabled, priv->tx_cfg.max_queues, 360 priv->rx_cfg.max_queues); 361 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 362 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 363 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 364 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 365 } 366 /* Half the notification blocks go to TX and half to RX */ 367 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 368 369 /* Setup Management Vector - the last vector */ 370 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 371 pci_name(priv->pdev)); 372 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 373 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 374 if (err) { 375 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 376 goto abort_with_msix_enabled; 377 } 378 priv->irq_db_indices = 379 dma_alloc_coherent(&priv->pdev->dev, 380 priv->num_ntfy_blks * 381 sizeof(*priv->irq_db_indices), 382 &priv->irq_db_indices_bus, GFP_KERNEL); 383 if (!priv->irq_db_indices) { 384 err = -ENOMEM; 385 goto abort_with_mgmt_vector; 386 } 387 388 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 389 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 390 if (!priv->ntfy_blocks) { 391 err = -ENOMEM; 392 goto abort_with_irq_db_indices; 393 } 394 395 /* Setup the other blocks - the first n-1 vectors */ 396 for (i = 0; i < priv->num_ntfy_blks; i++) { 397 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 398 int msix_idx = i; 399 400 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 401 i, pci_name(priv->pdev)); 402 block->priv = priv; 403 err = request_irq(priv->msix_vectors[msix_idx].vector, 404 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 405 0, block->name, block); 406 if (err) { 407 dev_err(&priv->pdev->dev, 408 "Failed to receive msix vector %d\n", i); 409 goto abort_with_some_ntfy_blocks; 410 } 411 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 412 get_cpu_mask(i % active_cpus)); 413 block->irq_db_index = &priv->irq_db_indices[i].index; 414 } 415 return 0; 416 abort_with_some_ntfy_blocks: 417 for (j = 0; j < i; j++) { 418 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 419 int msix_idx = j; 420 421 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 422 NULL); 423 free_irq(priv->msix_vectors[msix_idx].vector, block); 424 } 425 kvfree(priv->ntfy_blocks); 426 priv->ntfy_blocks = NULL; 427 abort_with_irq_db_indices: 428 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 429 sizeof(*priv->irq_db_indices), 430 priv->irq_db_indices, priv->irq_db_indices_bus); 431 priv->irq_db_indices = NULL; 432 abort_with_mgmt_vector: 433 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 434 abort_with_msix_enabled: 435 pci_disable_msix(priv->pdev); 436 abort_with_msix_vectors: 437 kvfree(priv->msix_vectors); 438 priv->msix_vectors = NULL; 439 return err; 440 } 441 442 static void gve_free_notify_blocks(struct gve_priv *priv) 443 { 444 int i; 445 446 if (!priv->msix_vectors) 447 return; 448 449 /* Free the irqs */ 450 for (i = 0; i < priv->num_ntfy_blks; i++) { 451 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 452 int msix_idx = i; 453 454 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 455 NULL); 456 free_irq(priv->msix_vectors[msix_idx].vector, block); 457 } 458 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 459 kvfree(priv->ntfy_blocks); 460 priv->ntfy_blocks = NULL; 461 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 462 sizeof(*priv->irq_db_indices), 463 priv->irq_db_indices, priv->irq_db_indices_bus); 464 priv->irq_db_indices = NULL; 465 pci_disable_msix(priv->pdev); 466 kvfree(priv->msix_vectors); 467 priv->msix_vectors = NULL; 468 } 469 470 static int gve_setup_device_resources(struct gve_priv *priv) 471 { 472 int err; 473 474 err = gve_alloc_counter_array(priv); 475 if (err) 476 return err; 477 err = gve_alloc_notify_blocks(priv); 478 if (err) 479 goto abort_with_counter; 480 err = gve_alloc_stats_report(priv); 481 if (err) 482 goto abort_with_ntfy_blocks; 483 err = gve_adminq_configure_device_resources(priv, 484 priv->counter_array_bus, 485 priv->num_event_counters, 486 priv->irq_db_indices_bus, 487 priv->num_ntfy_blks); 488 if (unlikely(err)) { 489 dev_err(&priv->pdev->dev, 490 "could not setup device_resources: err=%d\n", err); 491 err = -ENXIO; 492 goto abort_with_stats_report; 493 } 494 495 if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 496 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 497 GFP_KERNEL); 498 if (!priv->ptype_lut_dqo) { 499 err = -ENOMEM; 500 goto abort_with_stats_report; 501 } 502 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 503 if (err) { 504 dev_err(&priv->pdev->dev, 505 "Failed to get ptype map: err=%d\n", err); 506 goto abort_with_ptype_lut; 507 } 508 } 509 510 err = gve_adminq_report_stats(priv, priv->stats_report_len, 511 priv->stats_report_bus, 512 GVE_STATS_REPORT_TIMER_PERIOD); 513 if (err) 514 dev_err(&priv->pdev->dev, 515 "Failed to report stats: err=%d\n", err); 516 gve_set_device_resources_ok(priv); 517 return 0; 518 519 abort_with_ptype_lut: 520 kvfree(priv->ptype_lut_dqo); 521 priv->ptype_lut_dqo = NULL; 522 abort_with_stats_report: 523 gve_free_stats_report(priv); 524 abort_with_ntfy_blocks: 525 gve_free_notify_blocks(priv); 526 abort_with_counter: 527 gve_free_counter_array(priv); 528 529 return err; 530 } 531 532 static void gve_trigger_reset(struct gve_priv *priv); 533 534 static void gve_teardown_device_resources(struct gve_priv *priv) 535 { 536 int err; 537 538 /* Tell device its resources are being freed */ 539 if (gve_get_device_resources_ok(priv)) { 540 /* detach the stats report */ 541 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 542 if (err) { 543 dev_err(&priv->pdev->dev, 544 "Failed to detach stats report: err=%d\n", err); 545 gve_trigger_reset(priv); 546 } 547 err = gve_adminq_deconfigure_device_resources(priv); 548 if (err) { 549 dev_err(&priv->pdev->dev, 550 "Could not deconfigure device resources: err=%d\n", 551 err); 552 gve_trigger_reset(priv); 553 } 554 } 555 556 kvfree(priv->ptype_lut_dqo); 557 priv->ptype_lut_dqo = NULL; 558 559 gve_free_counter_array(priv); 560 gve_free_notify_blocks(priv); 561 gve_free_stats_report(priv); 562 gve_clear_device_resources_ok(priv); 563 } 564 565 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 566 int (*gve_poll)(struct napi_struct *, int)) 567 { 568 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 569 570 netif_napi_add(priv->dev, &block->napi, gve_poll); 571 } 572 573 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 574 { 575 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 576 577 netif_napi_del(&block->napi); 578 } 579 580 static int gve_register_xdp_qpls(struct gve_priv *priv) 581 { 582 int start_id; 583 int err; 584 int i; 585 586 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 587 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 588 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 589 if (err) { 590 netif_err(priv, drv, priv->dev, 591 "failed to register queue page list %d\n", 592 priv->qpls[i].id); 593 /* This failure will trigger a reset - no need to clean 594 * up 595 */ 596 return err; 597 } 598 } 599 return 0; 600 } 601 602 static int gve_register_qpls(struct gve_priv *priv) 603 { 604 int start_id; 605 int err; 606 int i; 607 608 start_id = gve_tx_start_qpl_id(priv); 609 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 610 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 611 if (err) { 612 netif_err(priv, drv, priv->dev, 613 "failed to register queue page list %d\n", 614 priv->qpls[i].id); 615 /* This failure will trigger a reset - no need to clean 616 * up 617 */ 618 return err; 619 } 620 } 621 622 start_id = gve_rx_start_qpl_id(priv); 623 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 624 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 625 if (err) { 626 netif_err(priv, drv, priv->dev, 627 "failed to register queue page list %d\n", 628 priv->qpls[i].id); 629 /* This failure will trigger a reset - no need to clean 630 * up 631 */ 632 return err; 633 } 634 } 635 return 0; 636 } 637 638 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 639 { 640 int start_id; 641 int err; 642 int i; 643 644 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 645 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 646 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 647 /* This failure will trigger a reset - no need to clean up */ 648 if (err) { 649 netif_err(priv, drv, priv->dev, 650 "Failed to unregister queue page list %d\n", 651 priv->qpls[i].id); 652 return err; 653 } 654 } 655 return 0; 656 } 657 658 static int gve_unregister_qpls(struct gve_priv *priv) 659 { 660 int start_id; 661 int err; 662 int i; 663 664 start_id = gve_tx_start_qpl_id(priv); 665 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 666 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 667 /* This failure will trigger a reset - no need to clean up */ 668 if (err) { 669 netif_err(priv, drv, priv->dev, 670 "Failed to unregister queue page list %d\n", 671 priv->qpls[i].id); 672 return err; 673 } 674 } 675 676 start_id = gve_rx_start_qpl_id(priv); 677 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 678 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 679 /* This failure will trigger a reset - no need to clean up */ 680 if (err) { 681 netif_err(priv, drv, priv->dev, 682 "Failed to unregister queue page list %d\n", 683 priv->qpls[i].id); 684 return err; 685 } 686 } 687 return 0; 688 } 689 690 static int gve_create_xdp_rings(struct gve_priv *priv) 691 { 692 int err; 693 694 err = gve_adminq_create_tx_queues(priv, 695 gve_xdp_tx_start_queue_id(priv), 696 priv->num_xdp_queues); 697 if (err) { 698 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 699 priv->num_xdp_queues); 700 /* This failure will trigger a reset - no need to clean 701 * up 702 */ 703 return err; 704 } 705 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 706 priv->num_xdp_queues); 707 708 return 0; 709 } 710 711 static int gve_create_rings(struct gve_priv *priv) 712 { 713 int num_tx_queues = gve_num_tx_queues(priv); 714 int err; 715 int i; 716 717 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 718 if (err) { 719 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 720 num_tx_queues); 721 /* This failure will trigger a reset - no need to clean 722 * up 723 */ 724 return err; 725 } 726 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 727 num_tx_queues); 728 729 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 730 if (err) { 731 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 732 priv->rx_cfg.num_queues); 733 /* This failure will trigger a reset - no need to clean 734 * up 735 */ 736 return err; 737 } 738 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 739 priv->rx_cfg.num_queues); 740 741 if (gve_is_gqi(priv)) { 742 /* Rx data ring has been prefilled with packet buffers at queue 743 * allocation time. 744 * 745 * Write the doorbell to provide descriptor slots and packet 746 * buffers to the NIC. 747 */ 748 for (i = 0; i < priv->rx_cfg.num_queues; i++) 749 gve_rx_write_doorbell(priv, &priv->rx[i]); 750 } else { 751 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 752 /* Post buffers and ring doorbell. */ 753 gve_rx_post_buffers_dqo(&priv->rx[i]); 754 } 755 } 756 757 return 0; 758 } 759 760 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 761 int (*napi_poll)(struct napi_struct *napi, 762 int budget)) 763 { 764 int start_id = gve_xdp_tx_start_queue_id(priv); 765 int i; 766 767 /* Add xdp tx napi & init sync stats*/ 768 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 769 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 770 771 u64_stats_init(&priv->tx[i].statss); 772 priv->tx[i].ntfy_id = ntfy_idx; 773 gve_add_napi(priv, ntfy_idx, napi_poll); 774 } 775 } 776 777 static void add_napi_init_sync_stats(struct gve_priv *priv, 778 int (*napi_poll)(struct napi_struct *napi, 779 int budget)) 780 { 781 int i; 782 783 /* Add tx napi & init sync stats*/ 784 for (i = 0; i < gve_num_tx_queues(priv); i++) { 785 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 786 787 u64_stats_init(&priv->tx[i].statss); 788 priv->tx[i].ntfy_id = ntfy_idx; 789 gve_add_napi(priv, ntfy_idx, napi_poll); 790 } 791 /* Add rx napi & init sync stats*/ 792 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 793 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 794 795 u64_stats_init(&priv->rx[i].statss); 796 priv->rx[i].ntfy_id = ntfy_idx; 797 gve_add_napi(priv, ntfy_idx, napi_poll); 798 } 799 } 800 801 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 802 { 803 if (gve_is_gqi(priv)) { 804 gve_tx_free_rings_gqi(priv, start_id, num_rings); 805 } else { 806 gve_tx_free_rings_dqo(priv); 807 } 808 } 809 810 static int gve_alloc_xdp_rings(struct gve_priv *priv) 811 { 812 int start_id; 813 int err = 0; 814 815 if (!priv->num_xdp_queues) 816 return 0; 817 818 start_id = gve_xdp_tx_start_queue_id(priv); 819 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 820 if (err) 821 return err; 822 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 823 824 return 0; 825 } 826 827 static int gve_alloc_rings(struct gve_priv *priv) 828 { 829 int err; 830 831 /* Setup tx rings */ 832 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 833 GFP_KERNEL); 834 if (!priv->tx) 835 return -ENOMEM; 836 837 if (gve_is_gqi(priv)) 838 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 839 else 840 err = gve_tx_alloc_rings_dqo(priv); 841 if (err) 842 goto free_tx; 843 844 /* Setup rx rings */ 845 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 846 GFP_KERNEL); 847 if (!priv->rx) { 848 err = -ENOMEM; 849 goto free_tx_queue; 850 } 851 852 if (gve_is_gqi(priv)) 853 err = gve_rx_alloc_rings(priv); 854 else 855 err = gve_rx_alloc_rings_dqo(priv); 856 if (err) 857 goto free_rx; 858 859 if (gve_is_gqi(priv)) 860 add_napi_init_sync_stats(priv, gve_napi_poll); 861 else 862 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 863 864 return 0; 865 866 free_rx: 867 kvfree(priv->rx); 868 priv->rx = NULL; 869 free_tx_queue: 870 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 871 free_tx: 872 kvfree(priv->tx); 873 priv->tx = NULL; 874 return err; 875 } 876 877 static int gve_destroy_xdp_rings(struct gve_priv *priv) 878 { 879 int start_id; 880 int err; 881 882 start_id = gve_xdp_tx_start_queue_id(priv); 883 err = gve_adminq_destroy_tx_queues(priv, 884 start_id, 885 priv->num_xdp_queues); 886 if (err) { 887 netif_err(priv, drv, priv->dev, 888 "failed to destroy XDP queues\n"); 889 /* This failure will trigger a reset - no need to clean up */ 890 return err; 891 } 892 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 893 894 return 0; 895 } 896 897 static int gve_destroy_rings(struct gve_priv *priv) 898 { 899 int num_tx_queues = gve_num_tx_queues(priv); 900 int err; 901 902 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 903 if (err) { 904 netif_err(priv, drv, priv->dev, 905 "failed to destroy tx queues\n"); 906 /* This failure will trigger a reset - no need to clean up */ 907 return err; 908 } 909 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 910 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 911 if (err) { 912 netif_err(priv, drv, priv->dev, 913 "failed to destroy rx queues\n"); 914 /* This failure will trigger a reset - no need to clean up */ 915 return err; 916 } 917 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 918 return 0; 919 } 920 921 static void gve_rx_free_rings(struct gve_priv *priv) 922 { 923 if (gve_is_gqi(priv)) 924 gve_rx_free_rings_gqi(priv); 925 else 926 gve_rx_free_rings_dqo(priv); 927 } 928 929 static void gve_free_xdp_rings(struct gve_priv *priv) 930 { 931 int ntfy_idx, start_id; 932 int i; 933 934 start_id = gve_xdp_tx_start_queue_id(priv); 935 if (priv->tx) { 936 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 937 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 938 gve_remove_napi(priv, ntfy_idx); 939 } 940 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 941 } 942 } 943 944 static void gve_free_rings(struct gve_priv *priv) 945 { 946 int num_tx_queues = gve_num_tx_queues(priv); 947 int ntfy_idx; 948 int i; 949 950 if (priv->tx) { 951 for (i = 0; i < num_tx_queues; i++) { 952 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 953 gve_remove_napi(priv, ntfy_idx); 954 } 955 gve_tx_free_rings(priv, 0, num_tx_queues); 956 kvfree(priv->tx); 957 priv->tx = NULL; 958 } 959 if (priv->rx) { 960 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 961 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 962 gve_remove_napi(priv, ntfy_idx); 963 } 964 gve_rx_free_rings(priv); 965 kvfree(priv->rx); 966 priv->rx = NULL; 967 } 968 } 969 970 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 971 struct page **page, dma_addr_t *dma, 972 enum dma_data_direction dir, gfp_t gfp_flags) 973 { 974 *page = alloc_page(gfp_flags); 975 if (!*page) { 976 priv->page_alloc_fail++; 977 return -ENOMEM; 978 } 979 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 980 if (dma_mapping_error(dev, *dma)) { 981 priv->dma_mapping_error++; 982 put_page(*page); 983 return -ENOMEM; 984 } 985 return 0; 986 } 987 988 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 989 int pages) 990 { 991 struct gve_queue_page_list *qpl = &priv->qpls[id]; 992 int err; 993 int i; 994 995 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 996 netif_err(priv, drv, priv->dev, 997 "Reached max number of registered pages %llu > %llu\n", 998 pages + priv->num_registered_pages, 999 priv->max_registered_pages); 1000 return -EINVAL; 1001 } 1002 1003 qpl->id = id; 1004 qpl->num_entries = 0; 1005 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1006 /* caller handles clean up */ 1007 if (!qpl->pages) 1008 return -ENOMEM; 1009 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1010 /* caller handles clean up */ 1011 if (!qpl->page_buses) 1012 return -ENOMEM; 1013 1014 for (i = 0; i < pages; i++) { 1015 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1016 &qpl->page_buses[i], 1017 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1018 /* caller handles clean up */ 1019 if (err) 1020 return -ENOMEM; 1021 qpl->num_entries++; 1022 } 1023 priv->num_registered_pages += pages; 1024 1025 return 0; 1026 } 1027 1028 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1029 enum dma_data_direction dir) 1030 { 1031 if (!dma_mapping_error(dev, dma)) 1032 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1033 if (page) 1034 put_page(page); 1035 } 1036 1037 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1038 { 1039 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1040 int i; 1041 1042 if (!qpl->pages) 1043 return; 1044 if (!qpl->page_buses) 1045 goto free_pages; 1046 1047 for (i = 0; i < qpl->num_entries; i++) 1048 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1049 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1050 1051 kvfree(qpl->page_buses); 1052 qpl->page_buses = NULL; 1053 free_pages: 1054 kvfree(qpl->pages); 1055 qpl->pages = NULL; 1056 priv->num_registered_pages -= qpl->num_entries; 1057 } 1058 1059 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1060 { 1061 int start_id; 1062 int i, j; 1063 int err; 1064 1065 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1066 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1067 err = gve_alloc_queue_page_list(priv, i, 1068 priv->tx_pages_per_qpl); 1069 if (err) 1070 goto free_qpls; 1071 } 1072 1073 return 0; 1074 1075 free_qpls: 1076 for (j = start_id; j <= i; j++) 1077 gve_free_queue_page_list(priv, j); 1078 return err; 1079 } 1080 1081 static int gve_alloc_qpls(struct gve_priv *priv) 1082 { 1083 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1084 int start_id; 1085 int i, j; 1086 int err; 1087 1088 if (priv->queue_format != GVE_GQI_QPL_FORMAT) 1089 return 0; 1090 1091 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1092 if (!priv->qpls) 1093 return -ENOMEM; 1094 1095 start_id = gve_tx_start_qpl_id(priv); 1096 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1097 err = gve_alloc_queue_page_list(priv, i, 1098 priv->tx_pages_per_qpl); 1099 if (err) 1100 goto free_qpls; 1101 } 1102 1103 start_id = gve_rx_start_qpl_id(priv); 1104 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1105 err = gve_alloc_queue_page_list(priv, i, 1106 priv->rx_data_slot_cnt); 1107 if (err) 1108 goto free_qpls; 1109 } 1110 1111 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1112 sizeof(unsigned long) * BITS_PER_BYTE; 1113 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1114 sizeof(unsigned long), GFP_KERNEL); 1115 if (!priv->qpl_cfg.qpl_id_map) { 1116 err = -ENOMEM; 1117 goto free_qpls; 1118 } 1119 1120 return 0; 1121 1122 free_qpls: 1123 for (j = 0; j <= i; j++) 1124 gve_free_queue_page_list(priv, j); 1125 kvfree(priv->qpls); 1126 priv->qpls = NULL; 1127 return err; 1128 } 1129 1130 static void gve_free_xdp_qpls(struct gve_priv *priv) 1131 { 1132 int start_id; 1133 int i; 1134 1135 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1136 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1137 gve_free_queue_page_list(priv, i); 1138 } 1139 1140 static void gve_free_qpls(struct gve_priv *priv) 1141 { 1142 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1143 int i; 1144 1145 if (!priv->qpls) 1146 return; 1147 1148 kvfree(priv->qpl_cfg.qpl_id_map); 1149 priv->qpl_cfg.qpl_id_map = NULL; 1150 1151 for (i = 0; i < max_queues; i++) 1152 gve_free_queue_page_list(priv, i); 1153 1154 kvfree(priv->qpls); 1155 priv->qpls = NULL; 1156 } 1157 1158 /* Use this to schedule a reset when the device is capable of continuing 1159 * to handle other requests in its current state. If it is not, do a reset 1160 * in thread instead. 1161 */ 1162 void gve_schedule_reset(struct gve_priv *priv) 1163 { 1164 gve_set_do_reset(priv); 1165 queue_work(priv->gve_wq, &priv->service_task); 1166 } 1167 1168 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1169 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1170 static void gve_turndown(struct gve_priv *priv); 1171 static void gve_turnup(struct gve_priv *priv); 1172 1173 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1174 { 1175 struct napi_struct *napi; 1176 struct gve_rx_ring *rx; 1177 int err = 0; 1178 int i, j; 1179 u32 tx_qid; 1180 1181 if (!priv->num_xdp_queues) 1182 return 0; 1183 1184 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1185 rx = &priv->rx[i]; 1186 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1187 1188 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1189 napi->napi_id); 1190 if (err) 1191 goto err; 1192 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1193 MEM_TYPE_PAGE_SHARED, NULL); 1194 if (err) 1195 goto err; 1196 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1197 if (rx->xsk_pool) { 1198 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1199 napi->napi_id); 1200 if (err) 1201 goto err; 1202 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1203 MEM_TYPE_XSK_BUFF_POOL, NULL); 1204 if (err) 1205 goto err; 1206 xsk_pool_set_rxq_info(rx->xsk_pool, 1207 &rx->xsk_rxq); 1208 } 1209 } 1210 1211 for (i = 0; i < priv->num_xdp_queues; i++) { 1212 tx_qid = gve_xdp_tx_queue_id(priv, i); 1213 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1214 } 1215 return 0; 1216 1217 err: 1218 for (j = i; j >= 0; j--) { 1219 rx = &priv->rx[j]; 1220 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1221 xdp_rxq_info_unreg(&rx->xdp_rxq); 1222 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1223 xdp_rxq_info_unreg(&rx->xsk_rxq); 1224 } 1225 return err; 1226 } 1227 1228 static void gve_unreg_xdp_info(struct gve_priv *priv) 1229 { 1230 int i, tx_qid; 1231 1232 if (!priv->num_xdp_queues) 1233 return; 1234 1235 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1236 struct gve_rx_ring *rx = &priv->rx[i]; 1237 1238 xdp_rxq_info_unreg(&rx->xdp_rxq); 1239 if (rx->xsk_pool) { 1240 xdp_rxq_info_unreg(&rx->xsk_rxq); 1241 rx->xsk_pool = NULL; 1242 } 1243 } 1244 1245 for (i = 0; i < priv->num_xdp_queues; i++) { 1246 tx_qid = gve_xdp_tx_queue_id(priv, i); 1247 priv->tx[tx_qid].xsk_pool = NULL; 1248 } 1249 } 1250 1251 static void gve_drain_page_cache(struct gve_priv *priv) 1252 { 1253 struct page_frag_cache *nc; 1254 int i; 1255 1256 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1257 nc = &priv->rx[i].page_cache; 1258 if (nc->va) { 1259 __page_frag_cache_drain(virt_to_page(nc->va), 1260 nc->pagecnt_bias); 1261 nc->va = NULL; 1262 } 1263 } 1264 } 1265 1266 static int gve_open(struct net_device *dev) 1267 { 1268 struct gve_priv *priv = netdev_priv(dev); 1269 int err; 1270 1271 if (priv->xdp_prog) 1272 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1273 else 1274 priv->num_xdp_queues = 0; 1275 1276 err = gve_alloc_qpls(priv); 1277 if (err) 1278 return err; 1279 1280 err = gve_alloc_rings(priv); 1281 if (err) 1282 goto free_qpls; 1283 1284 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1285 if (err) 1286 goto free_rings; 1287 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1288 if (err) 1289 goto free_rings; 1290 1291 err = gve_reg_xdp_info(priv, dev); 1292 if (err) 1293 goto free_rings; 1294 1295 err = gve_register_qpls(priv); 1296 if (err) 1297 goto reset; 1298 1299 if (!gve_is_gqi(priv)) { 1300 /* Hard code this for now. This may be tuned in the future for 1301 * performance. 1302 */ 1303 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1304 } 1305 err = gve_create_rings(priv); 1306 if (err) 1307 goto reset; 1308 1309 gve_set_device_rings_ok(priv); 1310 1311 if (gve_get_report_stats(priv)) 1312 mod_timer(&priv->stats_report_timer, 1313 round_jiffies(jiffies + 1314 msecs_to_jiffies(priv->stats_report_timer_period))); 1315 1316 gve_turnup(priv); 1317 queue_work(priv->gve_wq, &priv->service_task); 1318 priv->interface_up_cnt++; 1319 return 0; 1320 1321 free_rings: 1322 gve_free_rings(priv); 1323 free_qpls: 1324 gve_free_qpls(priv); 1325 return err; 1326 1327 reset: 1328 /* This must have been called from a reset due to the rtnl lock 1329 * so just return at this point. 1330 */ 1331 if (gve_get_reset_in_progress(priv)) 1332 return err; 1333 /* Otherwise reset before returning */ 1334 gve_reset_and_teardown(priv, true); 1335 /* if this fails there is nothing we can do so just ignore the return */ 1336 gve_reset_recovery(priv, false); 1337 /* return the original error */ 1338 return err; 1339 } 1340 1341 static int gve_close(struct net_device *dev) 1342 { 1343 struct gve_priv *priv = netdev_priv(dev); 1344 int err; 1345 1346 netif_carrier_off(dev); 1347 if (gve_get_device_rings_ok(priv)) { 1348 gve_turndown(priv); 1349 gve_drain_page_cache(priv); 1350 err = gve_destroy_rings(priv); 1351 if (err) 1352 goto err; 1353 err = gve_unregister_qpls(priv); 1354 if (err) 1355 goto err; 1356 gve_clear_device_rings_ok(priv); 1357 } 1358 del_timer_sync(&priv->stats_report_timer); 1359 1360 gve_unreg_xdp_info(priv); 1361 gve_free_rings(priv); 1362 gve_free_qpls(priv); 1363 priv->interface_down_cnt++; 1364 return 0; 1365 1366 err: 1367 /* This must have been called from a reset due to the rtnl lock 1368 * so just return at this point. 1369 */ 1370 if (gve_get_reset_in_progress(priv)) 1371 return err; 1372 /* Otherwise reset before returning */ 1373 gve_reset_and_teardown(priv, true); 1374 return gve_reset_recovery(priv, false); 1375 } 1376 1377 static int gve_remove_xdp_queues(struct gve_priv *priv) 1378 { 1379 int err; 1380 1381 err = gve_destroy_xdp_rings(priv); 1382 if (err) 1383 return err; 1384 1385 err = gve_unregister_xdp_qpls(priv); 1386 if (err) 1387 return err; 1388 1389 gve_unreg_xdp_info(priv); 1390 gve_free_xdp_rings(priv); 1391 gve_free_xdp_qpls(priv); 1392 priv->num_xdp_queues = 0; 1393 return 0; 1394 } 1395 1396 static int gve_add_xdp_queues(struct gve_priv *priv) 1397 { 1398 int err; 1399 1400 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1401 1402 err = gve_alloc_xdp_qpls(priv); 1403 if (err) 1404 goto err; 1405 1406 err = gve_alloc_xdp_rings(priv); 1407 if (err) 1408 goto free_xdp_qpls; 1409 1410 err = gve_reg_xdp_info(priv, priv->dev); 1411 if (err) 1412 goto free_xdp_rings; 1413 1414 err = gve_register_xdp_qpls(priv); 1415 if (err) 1416 goto free_xdp_rings; 1417 1418 err = gve_create_xdp_rings(priv); 1419 if (err) 1420 goto free_xdp_rings; 1421 1422 return 0; 1423 1424 free_xdp_rings: 1425 gve_free_xdp_rings(priv); 1426 free_xdp_qpls: 1427 gve_free_xdp_qpls(priv); 1428 err: 1429 priv->num_xdp_queues = 0; 1430 return err; 1431 } 1432 1433 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1434 { 1435 if (!gve_get_napi_enabled(priv)) 1436 return; 1437 1438 if (link_status == netif_carrier_ok(priv->dev)) 1439 return; 1440 1441 if (link_status) { 1442 netdev_info(priv->dev, "Device link is up.\n"); 1443 netif_carrier_on(priv->dev); 1444 } else { 1445 netdev_info(priv->dev, "Device link is down.\n"); 1446 netif_carrier_off(priv->dev); 1447 } 1448 } 1449 1450 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1451 struct netlink_ext_ack *extack) 1452 { 1453 struct bpf_prog *old_prog; 1454 int err = 0; 1455 u32 status; 1456 1457 old_prog = READ_ONCE(priv->xdp_prog); 1458 if (!netif_carrier_ok(priv->dev)) { 1459 WRITE_ONCE(priv->xdp_prog, prog); 1460 if (old_prog) 1461 bpf_prog_put(old_prog); 1462 return 0; 1463 } 1464 1465 gve_turndown(priv); 1466 if (!old_prog && prog) { 1467 // Allocate XDP TX queues if an XDP program is 1468 // being installed 1469 err = gve_add_xdp_queues(priv); 1470 if (err) 1471 goto out; 1472 } else if (old_prog && !prog) { 1473 // Remove XDP TX queues if an XDP program is 1474 // being uninstalled 1475 err = gve_remove_xdp_queues(priv); 1476 if (err) 1477 goto out; 1478 } 1479 WRITE_ONCE(priv->xdp_prog, prog); 1480 if (old_prog) 1481 bpf_prog_put(old_prog); 1482 1483 out: 1484 gve_turnup(priv); 1485 status = ioread32be(&priv->reg_bar0->device_status); 1486 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1487 return err; 1488 } 1489 1490 static int gve_xsk_pool_enable(struct net_device *dev, 1491 struct xsk_buff_pool *pool, 1492 u16 qid) 1493 { 1494 struct gve_priv *priv = netdev_priv(dev); 1495 struct napi_struct *napi; 1496 struct gve_rx_ring *rx; 1497 int tx_qid; 1498 int err; 1499 1500 if (qid >= priv->rx_cfg.num_queues) { 1501 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1502 return -EINVAL; 1503 } 1504 if (xsk_pool_get_rx_frame_size(pool) < 1505 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1506 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1507 return -EINVAL; 1508 } 1509 1510 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1511 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1512 if (err) 1513 return err; 1514 1515 /* If XDP prog is not installed, return */ 1516 if (!priv->xdp_prog) 1517 return 0; 1518 1519 rx = &priv->rx[qid]; 1520 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1521 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1522 if (err) 1523 goto err; 1524 1525 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1526 MEM_TYPE_XSK_BUFF_POOL, NULL); 1527 if (err) 1528 goto err; 1529 1530 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1531 rx->xsk_pool = pool; 1532 1533 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1534 priv->tx[tx_qid].xsk_pool = pool; 1535 1536 return 0; 1537 err: 1538 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1539 xdp_rxq_info_unreg(&rx->xsk_rxq); 1540 1541 xsk_pool_dma_unmap(pool, 1542 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1543 return err; 1544 } 1545 1546 static int gve_xsk_pool_disable(struct net_device *dev, 1547 u16 qid) 1548 { 1549 struct gve_priv *priv = netdev_priv(dev); 1550 struct napi_struct *napi_rx; 1551 struct napi_struct *napi_tx; 1552 struct xsk_buff_pool *pool; 1553 int tx_qid; 1554 1555 pool = xsk_get_pool_from_qid(dev, qid); 1556 if (!pool) 1557 return -EINVAL; 1558 if (qid >= priv->rx_cfg.num_queues) 1559 return -EINVAL; 1560 1561 /* If XDP prog is not installed, unmap DMA and return */ 1562 if (!priv->xdp_prog) 1563 goto done; 1564 1565 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1566 if (!netif_running(dev)) { 1567 priv->rx[qid].xsk_pool = NULL; 1568 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1569 priv->tx[tx_qid].xsk_pool = NULL; 1570 goto done; 1571 } 1572 1573 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1574 napi_disable(napi_rx); /* make sure current rx poll is done */ 1575 1576 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1577 napi_disable(napi_tx); /* make sure current tx poll is done */ 1578 1579 priv->rx[qid].xsk_pool = NULL; 1580 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1581 priv->tx[tx_qid].xsk_pool = NULL; 1582 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1583 1584 napi_enable(napi_rx); 1585 if (gve_rx_work_pending(&priv->rx[qid])) 1586 napi_schedule(napi_rx); 1587 1588 napi_enable(napi_tx); 1589 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1590 napi_schedule(napi_tx); 1591 1592 done: 1593 xsk_pool_dma_unmap(pool, 1594 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1595 return 0; 1596 } 1597 1598 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1599 { 1600 struct gve_priv *priv = netdev_priv(dev); 1601 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1602 1603 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1604 return -EINVAL; 1605 1606 if (flags & XDP_WAKEUP_TX) { 1607 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1608 struct napi_struct *napi = 1609 &priv->ntfy_blocks[tx->ntfy_id].napi; 1610 1611 if (!napi_if_scheduled_mark_missed(napi)) { 1612 /* Call local_bh_enable to trigger SoftIRQ processing */ 1613 local_bh_disable(); 1614 napi_schedule(napi); 1615 local_bh_enable(); 1616 } 1617 1618 tx->xdp_xsk_wakeup++; 1619 } 1620 1621 return 0; 1622 } 1623 1624 static int verify_xdp_configuration(struct net_device *dev) 1625 { 1626 struct gve_priv *priv = netdev_priv(dev); 1627 1628 if (dev->features & NETIF_F_LRO) { 1629 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1630 return -EOPNOTSUPP; 1631 } 1632 1633 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1634 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1635 priv->queue_format); 1636 return -EOPNOTSUPP; 1637 } 1638 1639 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1640 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1641 dev->mtu); 1642 return -EOPNOTSUPP; 1643 } 1644 1645 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1646 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1647 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1648 priv->rx_cfg.num_queues, 1649 priv->tx_cfg.num_queues, 1650 priv->tx_cfg.max_queues); 1651 return -EINVAL; 1652 } 1653 return 0; 1654 } 1655 1656 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1657 { 1658 struct gve_priv *priv = netdev_priv(dev); 1659 int err; 1660 1661 err = verify_xdp_configuration(dev); 1662 if (err) 1663 return err; 1664 switch (xdp->command) { 1665 case XDP_SETUP_PROG: 1666 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1667 case XDP_SETUP_XSK_POOL: 1668 if (xdp->xsk.pool) 1669 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1670 else 1671 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1672 default: 1673 return -EINVAL; 1674 } 1675 } 1676 1677 int gve_adjust_queues(struct gve_priv *priv, 1678 struct gve_queue_config new_rx_config, 1679 struct gve_queue_config new_tx_config) 1680 { 1681 int err; 1682 1683 if (netif_carrier_ok(priv->dev)) { 1684 /* To make this process as simple as possible we teardown the 1685 * device, set the new configuration, and then bring the device 1686 * up again. 1687 */ 1688 err = gve_close(priv->dev); 1689 /* we have already tried to reset in close, 1690 * just fail at this point 1691 */ 1692 if (err) 1693 return err; 1694 priv->tx_cfg = new_tx_config; 1695 priv->rx_cfg = new_rx_config; 1696 1697 err = gve_open(priv->dev); 1698 if (err) 1699 goto err; 1700 1701 return 0; 1702 } 1703 /* Set the config for the next up. */ 1704 priv->tx_cfg = new_tx_config; 1705 priv->rx_cfg = new_rx_config; 1706 1707 return 0; 1708 err: 1709 netif_err(priv, drv, priv->dev, 1710 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1711 gve_turndown(priv); 1712 return err; 1713 } 1714 1715 static void gve_turndown(struct gve_priv *priv) 1716 { 1717 int idx; 1718 1719 if (netif_carrier_ok(priv->dev)) 1720 netif_carrier_off(priv->dev); 1721 1722 if (!gve_get_napi_enabled(priv)) 1723 return; 1724 1725 /* Disable napi to prevent more work from coming in */ 1726 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1727 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1728 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1729 1730 napi_disable(&block->napi); 1731 } 1732 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1733 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1734 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1735 1736 napi_disable(&block->napi); 1737 } 1738 1739 /* Stop tx queues */ 1740 netif_tx_disable(priv->dev); 1741 1742 gve_clear_napi_enabled(priv); 1743 gve_clear_report_stats(priv); 1744 } 1745 1746 static void gve_turnup(struct gve_priv *priv) 1747 { 1748 int idx; 1749 1750 /* Start the tx queues */ 1751 netif_tx_start_all_queues(priv->dev); 1752 1753 /* Enable napi and unmask interrupts for all queues */ 1754 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1755 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1756 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1757 1758 napi_enable(&block->napi); 1759 if (gve_is_gqi(priv)) { 1760 iowrite32be(0, gve_irq_doorbell(priv, block)); 1761 } else { 1762 gve_set_itr_coalesce_usecs_dqo(priv, block, 1763 priv->tx_coalesce_usecs); 1764 } 1765 } 1766 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1767 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1768 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1769 1770 napi_enable(&block->napi); 1771 if (gve_is_gqi(priv)) { 1772 iowrite32be(0, gve_irq_doorbell(priv, block)); 1773 } else { 1774 gve_set_itr_coalesce_usecs_dqo(priv, block, 1775 priv->rx_coalesce_usecs); 1776 } 1777 } 1778 1779 gve_set_napi_enabled(priv); 1780 } 1781 1782 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1783 { 1784 struct gve_notify_block *block; 1785 struct gve_tx_ring *tx = NULL; 1786 struct gve_priv *priv; 1787 u32 last_nic_done; 1788 u32 current_time; 1789 u32 ntfy_idx; 1790 1791 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1792 priv = netdev_priv(dev); 1793 if (txqueue > priv->tx_cfg.num_queues) 1794 goto reset; 1795 1796 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1797 if (ntfy_idx >= priv->num_ntfy_blks) 1798 goto reset; 1799 1800 block = &priv->ntfy_blocks[ntfy_idx]; 1801 tx = block->tx; 1802 1803 current_time = jiffies_to_msecs(jiffies); 1804 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1805 goto reset; 1806 1807 /* Check to see if there are missed completions, which will allow us to 1808 * kick the queue. 1809 */ 1810 last_nic_done = gve_tx_load_event_counter(priv, tx); 1811 if (last_nic_done - tx->done) { 1812 netdev_info(dev, "Kicking queue %d", txqueue); 1813 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1814 napi_schedule(&block->napi); 1815 tx->last_kick_msec = current_time; 1816 goto out; 1817 } // Else reset. 1818 1819 reset: 1820 gve_schedule_reset(priv); 1821 1822 out: 1823 if (tx) 1824 tx->queue_timeout++; 1825 priv->tx_timeo_cnt++; 1826 } 1827 1828 static int gve_set_features(struct net_device *netdev, 1829 netdev_features_t features) 1830 { 1831 const netdev_features_t orig_features = netdev->features; 1832 struct gve_priv *priv = netdev_priv(netdev); 1833 int err; 1834 1835 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1836 netdev->features ^= NETIF_F_LRO; 1837 if (netif_carrier_ok(netdev)) { 1838 /* To make this process as simple as possible we 1839 * teardown the device, set the new configuration, 1840 * and then bring the device up again. 1841 */ 1842 err = gve_close(netdev); 1843 /* We have already tried to reset in close, just fail 1844 * at this point. 1845 */ 1846 if (err) 1847 goto err; 1848 1849 err = gve_open(netdev); 1850 if (err) 1851 goto err; 1852 } 1853 } 1854 1855 return 0; 1856 err: 1857 /* Reverts the change on error. */ 1858 netdev->features = orig_features; 1859 netif_err(priv, drv, netdev, 1860 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1861 return err; 1862 } 1863 1864 static const struct net_device_ops gve_netdev_ops = { 1865 .ndo_start_xmit = gve_start_xmit, 1866 .ndo_open = gve_open, 1867 .ndo_stop = gve_close, 1868 .ndo_get_stats64 = gve_get_stats, 1869 .ndo_tx_timeout = gve_tx_timeout, 1870 .ndo_set_features = gve_set_features, 1871 .ndo_bpf = gve_xdp, 1872 .ndo_xdp_xmit = gve_xdp_xmit, 1873 .ndo_xsk_wakeup = gve_xsk_wakeup, 1874 }; 1875 1876 static void gve_handle_status(struct gve_priv *priv, u32 status) 1877 { 1878 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1879 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1880 gve_set_do_reset(priv); 1881 } 1882 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1883 priv->stats_report_trigger_cnt++; 1884 gve_set_do_report_stats(priv); 1885 } 1886 } 1887 1888 static void gve_handle_reset(struct gve_priv *priv) 1889 { 1890 /* A service task will be scheduled at the end of probe to catch any 1891 * resets that need to happen, and we don't want to reset until 1892 * probe is done. 1893 */ 1894 if (gve_get_probe_in_progress(priv)) 1895 return; 1896 1897 if (gve_get_do_reset(priv)) { 1898 rtnl_lock(); 1899 gve_reset(priv, false); 1900 rtnl_unlock(); 1901 } 1902 } 1903 1904 void gve_handle_report_stats(struct gve_priv *priv) 1905 { 1906 struct stats *stats = priv->stats_report->stats; 1907 int idx, stats_idx = 0; 1908 unsigned int start = 0; 1909 u64 tx_bytes; 1910 1911 if (!gve_get_report_stats(priv)) 1912 return; 1913 1914 be64_add_cpu(&priv->stats_report->written_count, 1); 1915 /* tx stats */ 1916 if (priv->tx) { 1917 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1918 u32 last_completion = 0; 1919 u32 tx_frames = 0; 1920 1921 /* DQO doesn't currently support these metrics. */ 1922 if (gve_is_gqi(priv)) { 1923 last_completion = priv->tx[idx].done; 1924 tx_frames = priv->tx[idx].req; 1925 } 1926 1927 do { 1928 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1929 tx_bytes = priv->tx[idx].bytes_done; 1930 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1931 stats[stats_idx++] = (struct stats) { 1932 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1933 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1934 .queue_id = cpu_to_be32(idx), 1935 }; 1936 stats[stats_idx++] = (struct stats) { 1937 .stat_name = cpu_to_be32(TX_STOP_CNT), 1938 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1939 .queue_id = cpu_to_be32(idx), 1940 }; 1941 stats[stats_idx++] = (struct stats) { 1942 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1943 .value = cpu_to_be64(tx_frames), 1944 .queue_id = cpu_to_be32(idx), 1945 }; 1946 stats[stats_idx++] = (struct stats) { 1947 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1948 .value = cpu_to_be64(tx_bytes), 1949 .queue_id = cpu_to_be32(idx), 1950 }; 1951 stats[stats_idx++] = (struct stats) { 1952 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1953 .value = cpu_to_be64(last_completion), 1954 .queue_id = cpu_to_be32(idx), 1955 }; 1956 stats[stats_idx++] = (struct stats) { 1957 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1958 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1959 .queue_id = cpu_to_be32(idx), 1960 }; 1961 } 1962 } 1963 /* rx stats */ 1964 if (priv->rx) { 1965 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1966 stats[stats_idx++] = (struct stats) { 1967 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1968 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1969 .queue_id = cpu_to_be32(idx), 1970 }; 1971 stats[stats_idx++] = (struct stats) { 1972 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1973 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1974 .queue_id = cpu_to_be32(idx), 1975 }; 1976 } 1977 } 1978 } 1979 1980 /* Handle NIC status register changes, reset requests and report stats */ 1981 static void gve_service_task(struct work_struct *work) 1982 { 1983 struct gve_priv *priv = container_of(work, struct gve_priv, 1984 service_task); 1985 u32 status = ioread32be(&priv->reg_bar0->device_status); 1986 1987 gve_handle_status(priv, status); 1988 1989 gve_handle_reset(priv); 1990 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1991 } 1992 1993 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 1994 { 1995 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 1996 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 1997 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 1998 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 1999 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2000 } else { 2001 priv->dev->xdp_features = 0; 2002 } 2003 } 2004 2005 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2006 { 2007 int num_ntfy; 2008 int err; 2009 2010 /* Set up the adminq */ 2011 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2012 if (err) { 2013 dev_err(&priv->pdev->dev, 2014 "Failed to alloc admin queue: err=%d\n", err); 2015 return err; 2016 } 2017 2018 err = gve_verify_driver_compatibility(priv); 2019 if (err) { 2020 dev_err(&priv->pdev->dev, 2021 "Could not verify driver compatibility: err=%d\n", err); 2022 goto err; 2023 } 2024 2025 if (skip_describe_device) 2026 goto setup_device; 2027 2028 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2029 /* Get the initial information we need from the device */ 2030 err = gve_adminq_describe_device(priv); 2031 if (err) { 2032 dev_err(&priv->pdev->dev, 2033 "Could not get device information: err=%d\n", err); 2034 goto err; 2035 } 2036 priv->dev->mtu = priv->dev->max_mtu; 2037 num_ntfy = pci_msix_vec_count(priv->pdev); 2038 if (num_ntfy <= 0) { 2039 dev_err(&priv->pdev->dev, 2040 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2041 err = num_ntfy; 2042 goto err; 2043 } else if (num_ntfy < GVE_MIN_MSIX) { 2044 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2045 GVE_MIN_MSIX, num_ntfy); 2046 err = -EINVAL; 2047 goto err; 2048 } 2049 2050 priv->num_registered_pages = 0; 2051 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2052 /* gvnic has one Notification Block per MSI-x vector, except for the 2053 * management vector 2054 */ 2055 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2056 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2057 2058 priv->tx_cfg.max_queues = 2059 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2060 priv->rx_cfg.max_queues = 2061 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2062 2063 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2064 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2065 if (priv->default_num_queues > 0) { 2066 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2067 priv->tx_cfg.num_queues); 2068 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2069 priv->rx_cfg.num_queues); 2070 } 2071 2072 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2073 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2074 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2075 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2076 2077 if (!gve_is_gqi(priv)) { 2078 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2079 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2080 } 2081 2082 setup_device: 2083 gve_set_netdev_xdp_features(priv); 2084 err = gve_setup_device_resources(priv); 2085 if (!err) 2086 return 0; 2087 err: 2088 gve_adminq_free(&priv->pdev->dev, priv); 2089 return err; 2090 } 2091 2092 static void gve_teardown_priv_resources(struct gve_priv *priv) 2093 { 2094 gve_teardown_device_resources(priv); 2095 gve_adminq_free(&priv->pdev->dev, priv); 2096 } 2097 2098 static void gve_trigger_reset(struct gve_priv *priv) 2099 { 2100 /* Reset the device by releasing the AQ */ 2101 gve_adminq_release(priv); 2102 } 2103 2104 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2105 { 2106 gve_trigger_reset(priv); 2107 /* With the reset having already happened, close cannot fail */ 2108 if (was_up) 2109 gve_close(priv->dev); 2110 gve_teardown_priv_resources(priv); 2111 } 2112 2113 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2114 { 2115 int err; 2116 2117 err = gve_init_priv(priv, true); 2118 if (err) 2119 goto err; 2120 if (was_up) { 2121 err = gve_open(priv->dev); 2122 if (err) 2123 goto err; 2124 } 2125 return 0; 2126 err: 2127 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2128 gve_turndown(priv); 2129 return err; 2130 } 2131 2132 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2133 { 2134 bool was_up = netif_carrier_ok(priv->dev); 2135 int err; 2136 2137 dev_info(&priv->pdev->dev, "Performing reset\n"); 2138 gve_clear_do_reset(priv); 2139 gve_set_reset_in_progress(priv); 2140 /* If we aren't attempting to teardown normally, just go turndown and 2141 * reset right away. 2142 */ 2143 if (!attempt_teardown) { 2144 gve_turndown(priv); 2145 gve_reset_and_teardown(priv, was_up); 2146 } else { 2147 /* Otherwise attempt to close normally */ 2148 if (was_up) { 2149 err = gve_close(priv->dev); 2150 /* If that fails reset as we did above */ 2151 if (err) 2152 gve_reset_and_teardown(priv, was_up); 2153 } 2154 /* Clean up any remaining resources */ 2155 gve_teardown_priv_resources(priv); 2156 } 2157 2158 /* Set it all back up */ 2159 err = gve_reset_recovery(priv, was_up); 2160 gve_clear_reset_in_progress(priv); 2161 priv->reset_cnt++; 2162 priv->interface_up_cnt = 0; 2163 priv->interface_down_cnt = 0; 2164 priv->stats_report_trigger_cnt = 0; 2165 return err; 2166 } 2167 2168 static void gve_write_version(u8 __iomem *driver_version_register) 2169 { 2170 const char *c = gve_version_prefix; 2171 2172 while (*c) { 2173 writeb(*c, driver_version_register); 2174 c++; 2175 } 2176 2177 c = gve_version_str; 2178 while (*c) { 2179 writeb(*c, driver_version_register); 2180 c++; 2181 } 2182 writeb('\n', driver_version_register); 2183 } 2184 2185 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2186 { 2187 int max_tx_queues, max_rx_queues; 2188 struct net_device *dev; 2189 __be32 __iomem *db_bar; 2190 struct gve_registers __iomem *reg_bar; 2191 struct gve_priv *priv; 2192 int err; 2193 2194 err = pci_enable_device(pdev); 2195 if (err) 2196 return err; 2197 2198 err = pci_request_regions(pdev, "gvnic-cfg"); 2199 if (err) 2200 goto abort_with_enabled; 2201 2202 pci_set_master(pdev); 2203 2204 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2205 if (err) { 2206 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2207 goto abort_with_pci_region; 2208 } 2209 2210 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2211 if (!reg_bar) { 2212 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2213 err = -ENOMEM; 2214 goto abort_with_pci_region; 2215 } 2216 2217 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2218 if (!db_bar) { 2219 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2220 err = -ENOMEM; 2221 goto abort_with_reg_bar; 2222 } 2223 2224 gve_write_version(®_bar->driver_version); 2225 /* Get max queues to alloc etherdev */ 2226 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2227 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2228 /* Alloc and setup the netdev and priv */ 2229 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2230 if (!dev) { 2231 dev_err(&pdev->dev, "could not allocate netdev\n"); 2232 err = -ENOMEM; 2233 goto abort_with_db_bar; 2234 } 2235 SET_NETDEV_DEV(dev, &pdev->dev); 2236 pci_set_drvdata(pdev, dev); 2237 dev->ethtool_ops = &gve_ethtool_ops; 2238 dev->netdev_ops = &gve_netdev_ops; 2239 2240 /* Set default and supported features. 2241 * 2242 * Features might be set in other locations as well (such as 2243 * `gve_adminq_describe_device`). 2244 */ 2245 dev->hw_features = NETIF_F_HIGHDMA; 2246 dev->hw_features |= NETIF_F_SG; 2247 dev->hw_features |= NETIF_F_HW_CSUM; 2248 dev->hw_features |= NETIF_F_TSO; 2249 dev->hw_features |= NETIF_F_TSO6; 2250 dev->hw_features |= NETIF_F_TSO_ECN; 2251 dev->hw_features |= NETIF_F_RXCSUM; 2252 dev->hw_features |= NETIF_F_RXHASH; 2253 dev->features = dev->hw_features; 2254 dev->watchdog_timeo = 5 * HZ; 2255 dev->min_mtu = ETH_MIN_MTU; 2256 netif_carrier_off(dev); 2257 2258 priv = netdev_priv(dev); 2259 priv->dev = dev; 2260 priv->pdev = pdev; 2261 priv->msg_enable = DEFAULT_MSG_LEVEL; 2262 priv->reg_bar0 = reg_bar; 2263 priv->db_bar2 = db_bar; 2264 priv->service_task_flags = 0x0; 2265 priv->state_flags = 0x0; 2266 priv->ethtool_flags = 0x0; 2267 2268 gve_set_probe_in_progress(priv); 2269 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2270 if (!priv->gve_wq) { 2271 dev_err(&pdev->dev, "Could not allocate workqueue"); 2272 err = -ENOMEM; 2273 goto abort_with_netdev; 2274 } 2275 INIT_WORK(&priv->service_task, gve_service_task); 2276 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2277 priv->tx_cfg.max_queues = max_tx_queues; 2278 priv->rx_cfg.max_queues = max_rx_queues; 2279 2280 err = gve_init_priv(priv, false); 2281 if (err) 2282 goto abort_with_wq; 2283 2284 err = register_netdev(dev); 2285 if (err) 2286 goto abort_with_gve_init; 2287 2288 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2289 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2290 gve_clear_probe_in_progress(priv); 2291 queue_work(priv->gve_wq, &priv->service_task); 2292 return 0; 2293 2294 abort_with_gve_init: 2295 gve_teardown_priv_resources(priv); 2296 2297 abort_with_wq: 2298 destroy_workqueue(priv->gve_wq); 2299 2300 abort_with_netdev: 2301 free_netdev(dev); 2302 2303 abort_with_db_bar: 2304 pci_iounmap(pdev, db_bar); 2305 2306 abort_with_reg_bar: 2307 pci_iounmap(pdev, reg_bar); 2308 2309 abort_with_pci_region: 2310 pci_release_regions(pdev); 2311 2312 abort_with_enabled: 2313 pci_disable_device(pdev); 2314 return err; 2315 } 2316 2317 static void gve_remove(struct pci_dev *pdev) 2318 { 2319 struct net_device *netdev = pci_get_drvdata(pdev); 2320 struct gve_priv *priv = netdev_priv(netdev); 2321 __be32 __iomem *db_bar = priv->db_bar2; 2322 void __iomem *reg_bar = priv->reg_bar0; 2323 2324 unregister_netdev(netdev); 2325 gve_teardown_priv_resources(priv); 2326 destroy_workqueue(priv->gve_wq); 2327 free_netdev(netdev); 2328 pci_iounmap(pdev, db_bar); 2329 pci_iounmap(pdev, reg_bar); 2330 pci_release_regions(pdev); 2331 pci_disable_device(pdev); 2332 } 2333 2334 static void gve_shutdown(struct pci_dev *pdev) 2335 { 2336 struct net_device *netdev = pci_get_drvdata(pdev); 2337 struct gve_priv *priv = netdev_priv(netdev); 2338 bool was_up = netif_carrier_ok(priv->dev); 2339 2340 rtnl_lock(); 2341 if (was_up && gve_close(priv->dev)) { 2342 /* If the dev was up, attempt to close, if close fails, reset */ 2343 gve_reset_and_teardown(priv, was_up); 2344 } else { 2345 /* If the dev wasn't up or close worked, finish tearing down */ 2346 gve_teardown_priv_resources(priv); 2347 } 2348 rtnl_unlock(); 2349 } 2350 2351 #ifdef CONFIG_PM 2352 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2353 { 2354 struct net_device *netdev = pci_get_drvdata(pdev); 2355 struct gve_priv *priv = netdev_priv(netdev); 2356 bool was_up = netif_carrier_ok(priv->dev); 2357 2358 priv->suspend_cnt++; 2359 rtnl_lock(); 2360 if (was_up && gve_close(priv->dev)) { 2361 /* If the dev was up, attempt to close, if close fails, reset */ 2362 gve_reset_and_teardown(priv, was_up); 2363 } else { 2364 /* If the dev wasn't up or close worked, finish tearing down */ 2365 gve_teardown_priv_resources(priv); 2366 } 2367 priv->up_before_suspend = was_up; 2368 rtnl_unlock(); 2369 return 0; 2370 } 2371 2372 static int gve_resume(struct pci_dev *pdev) 2373 { 2374 struct net_device *netdev = pci_get_drvdata(pdev); 2375 struct gve_priv *priv = netdev_priv(netdev); 2376 int err; 2377 2378 priv->resume_cnt++; 2379 rtnl_lock(); 2380 err = gve_reset_recovery(priv, priv->up_before_suspend); 2381 rtnl_unlock(); 2382 return err; 2383 } 2384 #endif /* CONFIG_PM */ 2385 2386 static const struct pci_device_id gve_id_table[] = { 2387 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2388 { } 2389 }; 2390 2391 static struct pci_driver gvnic_driver = { 2392 .name = "gvnic", 2393 .id_table = gve_id_table, 2394 .probe = gve_probe, 2395 .remove = gve_remove, 2396 .shutdown = gve_shutdown, 2397 #ifdef CONFIG_PM 2398 .suspend = gve_suspend, 2399 .resume = gve_resume, 2400 #endif 2401 }; 2402 2403 module_pci_driver(gvnic_driver); 2404 2405 MODULE_DEVICE_TABLE(pci, gve_id_table); 2406 MODULE_AUTHOR("Google, Inc."); 2407 MODULE_DESCRIPTION("gVNIC Driver"); 2408 MODULE_LICENSE("Dual MIT/GPL"); 2409 MODULE_VERSION(GVE_VERSION); 2410