1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/cpumask.h> 8 #include <linux/etherdevice.h> 9 #include <linux/interrupt.h> 10 #include <linux/module.h> 11 #include <linux/pci.h> 12 #include <linux/sched.h> 13 #include <linux/timer.h> 14 #include <linux/workqueue.h> 15 #include <net/sch_generic.h> 16 #include "gve.h" 17 #include "gve_dqo.h" 18 #include "gve_adminq.h" 19 #include "gve_register.h" 20 21 #define GVE_DEFAULT_RX_COPYBREAK (256) 22 23 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 24 #define GVE_VERSION "1.0.0" 25 #define GVE_VERSION_PREFIX "GVE-" 26 27 const char gve_version_str[] = GVE_VERSION; 28 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 29 30 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 31 { 32 struct gve_priv *priv = netdev_priv(dev); 33 34 if (gve_is_gqi(priv)) 35 return gve_tx(skb, dev); 36 else 37 return gve_tx_dqo(skb, dev); 38 } 39 40 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 41 { 42 struct gve_priv *priv = netdev_priv(dev); 43 unsigned int start; 44 u64 packets, bytes; 45 int ring; 46 47 if (priv->rx) { 48 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 49 do { 50 start = 51 u64_stats_fetch_begin(&priv->rx[ring].statss); 52 packets = priv->rx[ring].rpackets; 53 bytes = priv->rx[ring].rbytes; 54 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 55 start)); 56 s->rx_packets += packets; 57 s->rx_bytes += bytes; 58 } 59 } 60 if (priv->tx) { 61 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { 62 do { 63 start = 64 u64_stats_fetch_begin(&priv->tx[ring].statss); 65 packets = priv->tx[ring].pkt_done; 66 bytes = priv->tx[ring].bytes_done; 67 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 68 start)); 69 s->tx_packets += packets; 70 s->tx_bytes += bytes; 71 } 72 } 73 } 74 75 static int gve_alloc_counter_array(struct gve_priv *priv) 76 { 77 priv->counter_array = 78 dma_alloc_coherent(&priv->pdev->dev, 79 priv->num_event_counters * 80 sizeof(*priv->counter_array), 81 &priv->counter_array_bus, GFP_KERNEL); 82 if (!priv->counter_array) 83 return -ENOMEM; 84 85 return 0; 86 } 87 88 static void gve_free_counter_array(struct gve_priv *priv) 89 { 90 if (!priv->counter_array) 91 return; 92 93 dma_free_coherent(&priv->pdev->dev, 94 priv->num_event_counters * 95 sizeof(*priv->counter_array), 96 priv->counter_array, priv->counter_array_bus); 97 priv->counter_array = NULL; 98 } 99 100 /* NIC requests to report stats */ 101 static void gve_stats_report_task(struct work_struct *work) 102 { 103 struct gve_priv *priv = container_of(work, struct gve_priv, 104 stats_report_task); 105 if (gve_get_do_report_stats(priv)) { 106 gve_handle_report_stats(priv); 107 gve_clear_do_report_stats(priv); 108 } 109 } 110 111 static void gve_stats_report_schedule(struct gve_priv *priv) 112 { 113 if (!gve_get_probe_in_progress(priv) && 114 !gve_get_reset_in_progress(priv)) { 115 gve_set_do_report_stats(priv); 116 queue_work(priv->gve_wq, &priv->stats_report_task); 117 } 118 } 119 120 static void gve_stats_report_timer(struct timer_list *t) 121 { 122 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 123 124 mod_timer(&priv->stats_report_timer, 125 round_jiffies(jiffies + 126 msecs_to_jiffies(priv->stats_report_timer_period))); 127 gve_stats_report_schedule(priv); 128 } 129 130 static int gve_alloc_stats_report(struct gve_priv *priv) 131 { 132 int tx_stats_num, rx_stats_num; 133 134 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 135 priv->tx_cfg.num_queues; 136 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 137 priv->rx_cfg.num_queues; 138 priv->stats_report_len = struct_size(priv->stats_report, stats, 139 tx_stats_num + rx_stats_num); 140 priv->stats_report = 141 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 142 &priv->stats_report_bus, GFP_KERNEL); 143 if (!priv->stats_report) 144 return -ENOMEM; 145 /* Set up timer for the report-stats task */ 146 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 147 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 148 return 0; 149 } 150 151 static void gve_free_stats_report(struct gve_priv *priv) 152 { 153 if (!priv->stats_report) 154 return; 155 156 del_timer_sync(&priv->stats_report_timer); 157 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 158 priv->stats_report, priv->stats_report_bus); 159 priv->stats_report = NULL; 160 } 161 162 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 163 { 164 struct gve_priv *priv = arg; 165 166 queue_work(priv->gve_wq, &priv->service_task); 167 return IRQ_HANDLED; 168 } 169 170 static irqreturn_t gve_intr(int irq, void *arg) 171 { 172 struct gve_notify_block *block = arg; 173 struct gve_priv *priv = block->priv; 174 175 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 176 napi_schedule_irqoff(&block->napi); 177 return IRQ_HANDLED; 178 } 179 180 static irqreturn_t gve_intr_dqo(int irq, void *arg) 181 { 182 struct gve_notify_block *block = arg; 183 184 /* Interrupts are automatically masked */ 185 napi_schedule_irqoff(&block->napi); 186 return IRQ_HANDLED; 187 } 188 189 static int gve_napi_poll(struct napi_struct *napi, int budget) 190 { 191 struct gve_notify_block *block; 192 __be32 __iomem *irq_doorbell; 193 bool reschedule = false; 194 struct gve_priv *priv; 195 196 block = container_of(napi, struct gve_notify_block, napi); 197 priv = block->priv; 198 199 if (block->tx) 200 reschedule |= gve_tx_poll(block, budget); 201 if (block->rx) 202 reschedule |= gve_rx_poll(block, budget); 203 204 if (reschedule) 205 return budget; 206 207 napi_complete(napi); 208 irq_doorbell = gve_irq_doorbell(priv, block); 209 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 210 211 /* Double check we have no extra work. 212 * Ensure unmask synchronizes with checking for work. 213 */ 214 mb(); 215 if (block->tx) 216 reschedule |= gve_tx_poll(block, -1); 217 if (block->rx) 218 reschedule |= gve_rx_poll(block, -1); 219 if (reschedule && napi_reschedule(napi)) 220 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 221 222 return 0; 223 } 224 225 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 226 { 227 struct gve_notify_block *block = 228 container_of(napi, struct gve_notify_block, napi); 229 struct gve_priv *priv = block->priv; 230 bool reschedule = false; 231 int work_done = 0; 232 233 /* Clear PCI MSI-X Pending Bit Array (PBA) 234 * 235 * This bit is set if an interrupt event occurs while the vector is 236 * masked. If this bit is set and we reenable the interrupt, it will 237 * fire again. Since we're just about to poll the queue state, we don't 238 * need it to fire again. 239 * 240 * Under high softirq load, it's possible that the interrupt condition 241 * is triggered twice before we got the chance to process it. 242 */ 243 gve_write_irq_doorbell_dqo(priv, block, 244 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); 245 246 if (block->tx) 247 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 248 249 if (block->rx) { 250 work_done = gve_rx_poll_dqo(block, budget); 251 reschedule |= work_done == budget; 252 } 253 254 if (reschedule) 255 return budget; 256 257 if (likely(napi_complete_done(napi, work_done))) { 258 /* Enable interrupts again. 259 * 260 * We don't need to repoll afterwards because HW supports the 261 * PCI MSI-X PBA feature. 262 * 263 * Another interrupt would be triggered if a new event came in 264 * since the last one. 265 */ 266 gve_write_irq_doorbell_dqo(priv, block, 267 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 268 } 269 270 return work_done; 271 } 272 273 static int gve_alloc_notify_blocks(struct gve_priv *priv) 274 { 275 int num_vecs_requested = priv->num_ntfy_blks + 1; 276 char *name = priv->dev->name; 277 unsigned int active_cpus; 278 int vecs_enabled; 279 int i, j; 280 int err; 281 282 priv->msix_vectors = kvzalloc(num_vecs_requested * 283 sizeof(*priv->msix_vectors), GFP_KERNEL); 284 if (!priv->msix_vectors) 285 return -ENOMEM; 286 for (i = 0; i < num_vecs_requested; i++) 287 priv->msix_vectors[i].entry = i; 288 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 289 GVE_MIN_MSIX, num_vecs_requested); 290 if (vecs_enabled < 0) { 291 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 292 GVE_MIN_MSIX, vecs_enabled); 293 err = vecs_enabled; 294 goto abort_with_msix_vectors; 295 } 296 if (vecs_enabled != num_vecs_requested) { 297 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 298 int vecs_per_type = new_num_ntfy_blks / 2; 299 int vecs_left = new_num_ntfy_blks % 2; 300 301 priv->num_ntfy_blks = new_num_ntfy_blks; 302 priv->mgmt_msix_idx = priv->num_ntfy_blks; 303 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 304 vecs_per_type); 305 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 306 vecs_per_type + vecs_left); 307 dev_err(&priv->pdev->dev, 308 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 309 vecs_enabled, priv->tx_cfg.max_queues, 310 priv->rx_cfg.max_queues); 311 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 312 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 313 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 314 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 315 } 316 /* Half the notification blocks go to TX and half to RX */ 317 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 318 319 /* Setup Management Vector - the last vector */ 320 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt", 321 name); 322 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 323 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 324 if (err) { 325 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 326 goto abort_with_msix_enabled; 327 } 328 priv->ntfy_blocks = 329 dma_alloc_coherent(&priv->pdev->dev, 330 priv->num_ntfy_blks * 331 sizeof(*priv->ntfy_blocks), 332 &priv->ntfy_block_bus, GFP_KERNEL); 333 if (!priv->ntfy_blocks) { 334 err = -ENOMEM; 335 goto abort_with_mgmt_vector; 336 } 337 /* Setup the other blocks - the first n-1 vectors */ 338 for (i = 0; i < priv->num_ntfy_blks; i++) { 339 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 340 int msix_idx = i; 341 342 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d", 343 name, i); 344 block->priv = priv; 345 err = request_irq(priv->msix_vectors[msix_idx].vector, 346 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 347 0, block->name, block); 348 if (err) { 349 dev_err(&priv->pdev->dev, 350 "Failed to receive msix vector %d\n", i); 351 goto abort_with_some_ntfy_blocks; 352 } 353 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 354 get_cpu_mask(i % active_cpus)); 355 } 356 return 0; 357 abort_with_some_ntfy_blocks: 358 for (j = 0; j < i; j++) { 359 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 360 int msix_idx = j; 361 362 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 363 NULL); 364 free_irq(priv->msix_vectors[msix_idx].vector, block); 365 } 366 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 367 sizeof(*priv->ntfy_blocks), 368 priv->ntfy_blocks, priv->ntfy_block_bus); 369 priv->ntfy_blocks = NULL; 370 abort_with_mgmt_vector: 371 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 372 abort_with_msix_enabled: 373 pci_disable_msix(priv->pdev); 374 abort_with_msix_vectors: 375 kvfree(priv->msix_vectors); 376 priv->msix_vectors = NULL; 377 return err; 378 } 379 380 static void gve_free_notify_blocks(struct gve_priv *priv) 381 { 382 int i; 383 384 if (!priv->msix_vectors) 385 return; 386 387 /* Free the irqs */ 388 for (i = 0; i < priv->num_ntfy_blks; i++) { 389 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 390 int msix_idx = i; 391 392 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 393 NULL); 394 free_irq(priv->msix_vectors[msix_idx].vector, block); 395 } 396 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 397 dma_free_coherent(&priv->pdev->dev, 398 priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), 399 priv->ntfy_blocks, priv->ntfy_block_bus); 400 priv->ntfy_blocks = NULL; 401 pci_disable_msix(priv->pdev); 402 kvfree(priv->msix_vectors); 403 priv->msix_vectors = NULL; 404 } 405 406 static int gve_setup_device_resources(struct gve_priv *priv) 407 { 408 int err; 409 410 err = gve_alloc_counter_array(priv); 411 if (err) 412 return err; 413 err = gve_alloc_notify_blocks(priv); 414 if (err) 415 goto abort_with_counter; 416 err = gve_alloc_stats_report(priv); 417 if (err) 418 goto abort_with_ntfy_blocks; 419 err = gve_adminq_configure_device_resources(priv, 420 priv->counter_array_bus, 421 priv->num_event_counters, 422 priv->ntfy_block_bus, 423 priv->num_ntfy_blks); 424 if (unlikely(err)) { 425 dev_err(&priv->pdev->dev, 426 "could not setup device_resources: err=%d\n", err); 427 err = -ENXIO; 428 goto abort_with_stats_report; 429 } 430 431 if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 432 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 433 GFP_KERNEL); 434 if (!priv->ptype_lut_dqo) { 435 err = -ENOMEM; 436 goto abort_with_stats_report; 437 } 438 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 439 if (err) { 440 dev_err(&priv->pdev->dev, 441 "Failed to get ptype map: err=%d\n", err); 442 goto abort_with_ptype_lut; 443 } 444 } 445 446 err = gve_adminq_report_stats(priv, priv->stats_report_len, 447 priv->stats_report_bus, 448 GVE_STATS_REPORT_TIMER_PERIOD); 449 if (err) 450 dev_err(&priv->pdev->dev, 451 "Failed to report stats: err=%d\n", err); 452 gve_set_device_resources_ok(priv); 453 return 0; 454 455 abort_with_ptype_lut: 456 kvfree(priv->ptype_lut_dqo); 457 priv->ptype_lut_dqo = NULL; 458 abort_with_stats_report: 459 gve_free_stats_report(priv); 460 abort_with_ntfy_blocks: 461 gve_free_notify_blocks(priv); 462 abort_with_counter: 463 gve_free_counter_array(priv); 464 465 return err; 466 } 467 468 static void gve_trigger_reset(struct gve_priv *priv); 469 470 static void gve_teardown_device_resources(struct gve_priv *priv) 471 { 472 int err; 473 474 /* Tell device its resources are being freed */ 475 if (gve_get_device_resources_ok(priv)) { 476 /* detach the stats report */ 477 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 478 if (err) { 479 dev_err(&priv->pdev->dev, 480 "Failed to detach stats report: err=%d\n", err); 481 gve_trigger_reset(priv); 482 } 483 err = gve_adminq_deconfigure_device_resources(priv); 484 if (err) { 485 dev_err(&priv->pdev->dev, 486 "Could not deconfigure device resources: err=%d\n", 487 err); 488 gve_trigger_reset(priv); 489 } 490 } 491 492 kvfree(priv->ptype_lut_dqo); 493 priv->ptype_lut_dqo = NULL; 494 495 gve_free_counter_array(priv); 496 gve_free_notify_blocks(priv); 497 gve_free_stats_report(priv); 498 gve_clear_device_resources_ok(priv); 499 } 500 501 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 502 int (*gve_poll)(struct napi_struct *, int)) 503 { 504 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 505 506 netif_napi_add(priv->dev, &block->napi, gve_poll, 507 NAPI_POLL_WEIGHT); 508 } 509 510 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 511 { 512 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 513 514 netif_napi_del(&block->napi); 515 } 516 517 static int gve_register_qpls(struct gve_priv *priv) 518 { 519 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 520 int err; 521 int i; 522 523 for (i = 0; i < num_qpls; i++) { 524 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 525 if (err) { 526 netif_err(priv, drv, priv->dev, 527 "failed to register queue page list %d\n", 528 priv->qpls[i].id); 529 /* This failure will trigger a reset - no need to clean 530 * up 531 */ 532 return err; 533 } 534 } 535 return 0; 536 } 537 538 static int gve_unregister_qpls(struct gve_priv *priv) 539 { 540 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 541 int err; 542 int i; 543 544 for (i = 0; i < num_qpls; i++) { 545 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 546 /* This failure will trigger a reset - no need to clean up */ 547 if (err) { 548 netif_err(priv, drv, priv->dev, 549 "Failed to unregister queue page list %d\n", 550 priv->qpls[i].id); 551 return err; 552 } 553 } 554 return 0; 555 } 556 557 static int gve_create_rings(struct gve_priv *priv) 558 { 559 int err; 560 int i; 561 562 err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); 563 if (err) { 564 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 565 priv->tx_cfg.num_queues); 566 /* This failure will trigger a reset - no need to clean 567 * up 568 */ 569 return err; 570 } 571 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 572 priv->tx_cfg.num_queues); 573 574 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 575 if (err) { 576 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 577 priv->rx_cfg.num_queues); 578 /* This failure will trigger a reset - no need to clean 579 * up 580 */ 581 return err; 582 } 583 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 584 priv->rx_cfg.num_queues); 585 586 if (gve_is_gqi(priv)) { 587 /* Rx data ring has been prefilled with packet buffers at queue 588 * allocation time. 589 * 590 * Write the doorbell to provide descriptor slots and packet 591 * buffers to the NIC. 592 */ 593 for (i = 0; i < priv->rx_cfg.num_queues; i++) 594 gve_rx_write_doorbell(priv, &priv->rx[i]); 595 } else { 596 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 597 /* Post buffers and ring doorbell. */ 598 gve_rx_post_buffers_dqo(&priv->rx[i]); 599 } 600 } 601 602 return 0; 603 } 604 605 static void add_napi_init_sync_stats(struct gve_priv *priv, 606 int (*napi_poll)(struct napi_struct *napi, 607 int budget)) 608 { 609 int i; 610 611 /* Add tx napi & init sync stats*/ 612 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 613 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 614 615 u64_stats_init(&priv->tx[i].statss); 616 priv->tx[i].ntfy_id = ntfy_idx; 617 gve_add_napi(priv, ntfy_idx, napi_poll); 618 } 619 /* Add rx napi & init sync stats*/ 620 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 621 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 622 623 u64_stats_init(&priv->rx[i].statss); 624 priv->rx[i].ntfy_id = ntfy_idx; 625 gve_add_napi(priv, ntfy_idx, napi_poll); 626 } 627 } 628 629 static void gve_tx_free_rings(struct gve_priv *priv) 630 { 631 if (gve_is_gqi(priv)) { 632 gve_tx_free_rings_gqi(priv); 633 } else { 634 gve_tx_free_rings_dqo(priv); 635 } 636 } 637 638 static int gve_alloc_rings(struct gve_priv *priv) 639 { 640 int err; 641 642 /* Setup tx rings */ 643 priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx), 644 GFP_KERNEL); 645 if (!priv->tx) 646 return -ENOMEM; 647 648 if (gve_is_gqi(priv)) 649 err = gve_tx_alloc_rings(priv); 650 else 651 err = gve_tx_alloc_rings_dqo(priv); 652 if (err) 653 goto free_tx; 654 655 /* Setup rx rings */ 656 priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx), 657 GFP_KERNEL); 658 if (!priv->rx) { 659 err = -ENOMEM; 660 goto free_tx_queue; 661 } 662 663 if (gve_is_gqi(priv)) 664 err = gve_rx_alloc_rings(priv); 665 else 666 err = gve_rx_alloc_rings_dqo(priv); 667 if (err) 668 goto free_rx; 669 670 if (gve_is_gqi(priv)) 671 add_napi_init_sync_stats(priv, gve_napi_poll); 672 else 673 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 674 675 return 0; 676 677 free_rx: 678 kvfree(priv->rx); 679 priv->rx = NULL; 680 free_tx_queue: 681 gve_tx_free_rings(priv); 682 free_tx: 683 kvfree(priv->tx); 684 priv->tx = NULL; 685 return err; 686 } 687 688 static int gve_destroy_rings(struct gve_priv *priv) 689 { 690 int err; 691 692 err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues); 693 if (err) { 694 netif_err(priv, drv, priv->dev, 695 "failed to destroy tx queues\n"); 696 /* This failure will trigger a reset - no need to clean up */ 697 return err; 698 } 699 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 700 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 701 if (err) { 702 netif_err(priv, drv, priv->dev, 703 "failed to destroy rx queues\n"); 704 /* This failure will trigger a reset - no need to clean up */ 705 return err; 706 } 707 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 708 return 0; 709 } 710 711 static void gve_rx_free_rings(struct gve_priv *priv) 712 { 713 if (gve_is_gqi(priv)) 714 gve_rx_free_rings_gqi(priv); 715 else 716 gve_rx_free_rings_dqo(priv); 717 } 718 719 static void gve_free_rings(struct gve_priv *priv) 720 { 721 int ntfy_idx; 722 int i; 723 724 if (priv->tx) { 725 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 726 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 727 gve_remove_napi(priv, ntfy_idx); 728 } 729 gve_tx_free_rings(priv); 730 kvfree(priv->tx); 731 priv->tx = NULL; 732 } 733 if (priv->rx) { 734 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 735 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 736 gve_remove_napi(priv, ntfy_idx); 737 } 738 gve_rx_free_rings(priv); 739 kvfree(priv->rx); 740 priv->rx = NULL; 741 } 742 } 743 744 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 745 struct page **page, dma_addr_t *dma, 746 enum dma_data_direction dir) 747 { 748 *page = alloc_page(GFP_KERNEL); 749 if (!*page) { 750 priv->page_alloc_fail++; 751 return -ENOMEM; 752 } 753 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 754 if (dma_mapping_error(dev, *dma)) { 755 priv->dma_mapping_error++; 756 put_page(*page); 757 return -ENOMEM; 758 } 759 return 0; 760 } 761 762 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 763 int pages) 764 { 765 struct gve_queue_page_list *qpl = &priv->qpls[id]; 766 int err; 767 int i; 768 769 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 770 netif_err(priv, drv, priv->dev, 771 "Reached max number of registered pages %llu > %llu\n", 772 pages + priv->num_registered_pages, 773 priv->max_registered_pages); 774 return -EINVAL; 775 } 776 777 qpl->id = id; 778 qpl->num_entries = 0; 779 qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL); 780 /* caller handles clean up */ 781 if (!qpl->pages) 782 return -ENOMEM; 783 qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses), 784 GFP_KERNEL); 785 /* caller handles clean up */ 786 if (!qpl->page_buses) 787 return -ENOMEM; 788 789 for (i = 0; i < pages; i++) { 790 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 791 &qpl->page_buses[i], 792 gve_qpl_dma_dir(priv, id)); 793 /* caller handles clean up */ 794 if (err) 795 return -ENOMEM; 796 qpl->num_entries++; 797 } 798 priv->num_registered_pages += pages; 799 800 return 0; 801 } 802 803 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 804 enum dma_data_direction dir) 805 { 806 if (!dma_mapping_error(dev, dma)) 807 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 808 if (page) 809 put_page(page); 810 } 811 812 static void gve_free_queue_page_list(struct gve_priv *priv, 813 int id) 814 { 815 struct gve_queue_page_list *qpl = &priv->qpls[id]; 816 int i; 817 818 if (!qpl->pages) 819 return; 820 if (!qpl->page_buses) 821 goto free_pages; 822 823 for (i = 0; i < qpl->num_entries; i++) 824 gve_free_page(&priv->pdev->dev, qpl->pages[i], 825 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 826 827 kvfree(qpl->page_buses); 828 free_pages: 829 kvfree(qpl->pages); 830 priv->num_registered_pages -= qpl->num_entries; 831 } 832 833 static int gve_alloc_qpls(struct gve_priv *priv) 834 { 835 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 836 int i, j; 837 int err; 838 839 /* Raw addressing means no QPLs */ 840 if (priv->queue_format == GVE_GQI_RDA_FORMAT) 841 return 0; 842 843 priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL); 844 if (!priv->qpls) 845 return -ENOMEM; 846 847 for (i = 0; i < gve_num_tx_qpls(priv); i++) { 848 err = gve_alloc_queue_page_list(priv, i, 849 priv->tx_pages_per_qpl); 850 if (err) 851 goto free_qpls; 852 } 853 for (; i < num_qpls; i++) { 854 err = gve_alloc_queue_page_list(priv, i, 855 priv->rx_data_slot_cnt); 856 if (err) 857 goto free_qpls; 858 } 859 860 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * 861 sizeof(unsigned long) * BITS_PER_BYTE; 862 priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) * 863 sizeof(unsigned long), GFP_KERNEL); 864 if (!priv->qpl_cfg.qpl_id_map) { 865 err = -ENOMEM; 866 goto free_qpls; 867 } 868 869 return 0; 870 871 free_qpls: 872 for (j = 0; j <= i; j++) 873 gve_free_queue_page_list(priv, j); 874 kvfree(priv->qpls); 875 return err; 876 } 877 878 static void gve_free_qpls(struct gve_priv *priv) 879 { 880 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 881 int i; 882 883 /* Raw addressing means no QPLs */ 884 if (priv->queue_format == GVE_GQI_RDA_FORMAT) 885 return; 886 887 kvfree(priv->qpl_cfg.qpl_id_map); 888 889 for (i = 0; i < num_qpls; i++) 890 gve_free_queue_page_list(priv, i); 891 892 kvfree(priv->qpls); 893 } 894 895 /* Use this to schedule a reset when the device is capable of continuing 896 * to handle other requests in its current state. If it is not, do a reset 897 * in thread instead. 898 */ 899 void gve_schedule_reset(struct gve_priv *priv) 900 { 901 gve_set_do_reset(priv); 902 queue_work(priv->gve_wq, &priv->service_task); 903 } 904 905 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 906 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 907 static void gve_turndown(struct gve_priv *priv); 908 static void gve_turnup(struct gve_priv *priv); 909 910 static int gve_open(struct net_device *dev) 911 { 912 struct gve_priv *priv = netdev_priv(dev); 913 int err; 914 915 err = gve_alloc_qpls(priv); 916 if (err) 917 return err; 918 919 err = gve_alloc_rings(priv); 920 if (err) 921 goto free_qpls; 922 923 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 924 if (err) 925 goto free_rings; 926 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 927 if (err) 928 goto free_rings; 929 930 err = gve_register_qpls(priv); 931 if (err) 932 goto reset; 933 934 if (!gve_is_gqi(priv)) { 935 /* Hard code this for now. This may be tuned in the future for 936 * performance. 937 */ 938 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 939 } 940 err = gve_create_rings(priv); 941 if (err) 942 goto reset; 943 944 gve_set_device_rings_ok(priv); 945 946 if (gve_get_report_stats(priv)) 947 mod_timer(&priv->stats_report_timer, 948 round_jiffies(jiffies + 949 msecs_to_jiffies(priv->stats_report_timer_period))); 950 951 gve_turnup(priv); 952 queue_work(priv->gve_wq, &priv->service_task); 953 priv->interface_up_cnt++; 954 return 0; 955 956 free_rings: 957 gve_free_rings(priv); 958 free_qpls: 959 gve_free_qpls(priv); 960 return err; 961 962 reset: 963 /* This must have been called from a reset due to the rtnl lock 964 * so just return at this point. 965 */ 966 if (gve_get_reset_in_progress(priv)) 967 return err; 968 /* Otherwise reset before returning */ 969 gve_reset_and_teardown(priv, true); 970 /* if this fails there is nothing we can do so just ignore the return */ 971 gve_reset_recovery(priv, false); 972 /* return the original error */ 973 return err; 974 } 975 976 static int gve_close(struct net_device *dev) 977 { 978 struct gve_priv *priv = netdev_priv(dev); 979 int err; 980 981 netif_carrier_off(dev); 982 if (gve_get_device_rings_ok(priv)) { 983 gve_turndown(priv); 984 err = gve_destroy_rings(priv); 985 if (err) 986 goto err; 987 err = gve_unregister_qpls(priv); 988 if (err) 989 goto err; 990 gve_clear_device_rings_ok(priv); 991 } 992 del_timer_sync(&priv->stats_report_timer); 993 994 gve_free_rings(priv); 995 gve_free_qpls(priv); 996 priv->interface_down_cnt++; 997 return 0; 998 999 err: 1000 /* This must have been called from a reset due to the rtnl lock 1001 * so just return at this point. 1002 */ 1003 if (gve_get_reset_in_progress(priv)) 1004 return err; 1005 /* Otherwise reset before returning */ 1006 gve_reset_and_teardown(priv, true); 1007 return gve_reset_recovery(priv, false); 1008 } 1009 1010 int gve_adjust_queues(struct gve_priv *priv, 1011 struct gve_queue_config new_rx_config, 1012 struct gve_queue_config new_tx_config) 1013 { 1014 int err; 1015 1016 if (netif_carrier_ok(priv->dev)) { 1017 /* To make this process as simple as possible we teardown the 1018 * device, set the new configuration, and then bring the device 1019 * up again. 1020 */ 1021 err = gve_close(priv->dev); 1022 /* we have already tried to reset in close, 1023 * just fail at this point 1024 */ 1025 if (err) 1026 return err; 1027 priv->tx_cfg = new_tx_config; 1028 priv->rx_cfg = new_rx_config; 1029 1030 err = gve_open(priv->dev); 1031 if (err) 1032 goto err; 1033 1034 return 0; 1035 } 1036 /* Set the config for the next up. */ 1037 priv->tx_cfg = new_tx_config; 1038 priv->rx_cfg = new_rx_config; 1039 1040 return 0; 1041 err: 1042 netif_err(priv, drv, priv->dev, 1043 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1044 gve_turndown(priv); 1045 return err; 1046 } 1047 1048 static void gve_turndown(struct gve_priv *priv) 1049 { 1050 int idx; 1051 1052 if (netif_carrier_ok(priv->dev)) 1053 netif_carrier_off(priv->dev); 1054 1055 if (!gve_get_napi_enabled(priv)) 1056 return; 1057 1058 /* Disable napi to prevent more work from coming in */ 1059 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 1060 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1061 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1062 1063 napi_disable(&block->napi); 1064 } 1065 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1066 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1067 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1068 1069 napi_disable(&block->napi); 1070 } 1071 1072 /* Stop tx queues */ 1073 netif_tx_disable(priv->dev); 1074 1075 gve_clear_napi_enabled(priv); 1076 gve_clear_report_stats(priv); 1077 } 1078 1079 static void gve_turnup(struct gve_priv *priv) 1080 { 1081 int idx; 1082 1083 /* Start the tx queues */ 1084 netif_tx_start_all_queues(priv->dev); 1085 1086 /* Enable napi and unmask interrupts for all queues */ 1087 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 1088 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1089 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1090 1091 napi_enable(&block->napi); 1092 if (gve_is_gqi(priv)) { 1093 iowrite32be(0, gve_irq_doorbell(priv, block)); 1094 } else { 1095 u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO); 1096 1097 gve_write_irq_doorbell_dqo(priv, block, val); 1098 } 1099 } 1100 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1101 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1102 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1103 1104 napi_enable(&block->napi); 1105 if (gve_is_gqi(priv)) { 1106 iowrite32be(0, gve_irq_doorbell(priv, block)); 1107 } else { 1108 u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO); 1109 1110 gve_write_irq_doorbell_dqo(priv, block, val); 1111 } 1112 } 1113 1114 gve_set_napi_enabled(priv); 1115 } 1116 1117 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1118 { 1119 struct gve_priv *priv = netdev_priv(dev); 1120 1121 gve_schedule_reset(priv); 1122 priv->tx_timeo_cnt++; 1123 } 1124 1125 static int gve_set_features(struct net_device *netdev, 1126 netdev_features_t features) 1127 { 1128 const netdev_features_t orig_features = netdev->features; 1129 struct gve_priv *priv = netdev_priv(netdev); 1130 int err; 1131 1132 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1133 netdev->features ^= NETIF_F_LRO; 1134 if (netif_carrier_ok(netdev)) { 1135 /* To make this process as simple as possible we 1136 * teardown the device, set the new configuration, 1137 * and then bring the device up again. 1138 */ 1139 err = gve_close(netdev); 1140 /* We have already tried to reset in close, just fail 1141 * at this point. 1142 */ 1143 if (err) 1144 goto err; 1145 1146 err = gve_open(netdev); 1147 if (err) 1148 goto err; 1149 } 1150 } 1151 1152 return 0; 1153 err: 1154 /* Reverts the change on error. */ 1155 netdev->features = orig_features; 1156 netif_err(priv, drv, netdev, 1157 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1158 return err; 1159 } 1160 1161 static const struct net_device_ops gve_netdev_ops = { 1162 .ndo_start_xmit = gve_start_xmit, 1163 .ndo_open = gve_open, 1164 .ndo_stop = gve_close, 1165 .ndo_get_stats64 = gve_get_stats, 1166 .ndo_tx_timeout = gve_tx_timeout, 1167 .ndo_set_features = gve_set_features, 1168 }; 1169 1170 static void gve_handle_status(struct gve_priv *priv, u32 status) 1171 { 1172 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1173 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1174 gve_set_do_reset(priv); 1175 } 1176 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1177 priv->stats_report_trigger_cnt++; 1178 gve_set_do_report_stats(priv); 1179 } 1180 } 1181 1182 static void gve_handle_reset(struct gve_priv *priv) 1183 { 1184 /* A service task will be scheduled at the end of probe to catch any 1185 * resets that need to happen, and we don't want to reset until 1186 * probe is done. 1187 */ 1188 if (gve_get_probe_in_progress(priv)) 1189 return; 1190 1191 if (gve_get_do_reset(priv)) { 1192 rtnl_lock(); 1193 gve_reset(priv, false); 1194 rtnl_unlock(); 1195 } 1196 } 1197 1198 void gve_handle_report_stats(struct gve_priv *priv) 1199 { 1200 struct stats *stats = priv->stats_report->stats; 1201 int idx, stats_idx = 0; 1202 unsigned int start = 0; 1203 u64 tx_bytes; 1204 1205 if (!gve_get_report_stats(priv)) 1206 return; 1207 1208 be64_add_cpu(&priv->stats_report->written_count, 1); 1209 /* tx stats */ 1210 if (priv->tx) { 1211 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 1212 u32 last_completion = 0; 1213 u32 tx_frames = 0; 1214 1215 /* DQO doesn't currently support these metrics. */ 1216 if (gve_is_gqi(priv)) { 1217 last_completion = priv->tx[idx].done; 1218 tx_frames = priv->tx[idx].req; 1219 } 1220 1221 do { 1222 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1223 tx_bytes = priv->tx[idx].bytes_done; 1224 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1225 stats[stats_idx++] = (struct stats) { 1226 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1227 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1228 .queue_id = cpu_to_be32(idx), 1229 }; 1230 stats[stats_idx++] = (struct stats) { 1231 .stat_name = cpu_to_be32(TX_STOP_CNT), 1232 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1233 .queue_id = cpu_to_be32(idx), 1234 }; 1235 stats[stats_idx++] = (struct stats) { 1236 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1237 .value = cpu_to_be64(tx_frames), 1238 .queue_id = cpu_to_be32(idx), 1239 }; 1240 stats[stats_idx++] = (struct stats) { 1241 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1242 .value = cpu_to_be64(tx_bytes), 1243 .queue_id = cpu_to_be32(idx), 1244 }; 1245 stats[stats_idx++] = (struct stats) { 1246 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1247 .value = cpu_to_be64(last_completion), 1248 .queue_id = cpu_to_be32(idx), 1249 }; 1250 } 1251 } 1252 /* rx stats */ 1253 if (priv->rx) { 1254 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1255 stats[stats_idx++] = (struct stats) { 1256 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1257 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1258 .queue_id = cpu_to_be32(idx), 1259 }; 1260 stats[stats_idx++] = (struct stats) { 1261 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1262 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1263 .queue_id = cpu_to_be32(idx), 1264 }; 1265 } 1266 } 1267 } 1268 1269 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1270 { 1271 if (!gve_get_napi_enabled(priv)) 1272 return; 1273 1274 if (link_status == netif_carrier_ok(priv->dev)) 1275 return; 1276 1277 if (link_status) { 1278 netdev_info(priv->dev, "Device link is up.\n"); 1279 netif_carrier_on(priv->dev); 1280 } else { 1281 netdev_info(priv->dev, "Device link is down.\n"); 1282 netif_carrier_off(priv->dev); 1283 } 1284 } 1285 1286 /* Handle NIC status register changes, reset requests and report stats */ 1287 static void gve_service_task(struct work_struct *work) 1288 { 1289 struct gve_priv *priv = container_of(work, struct gve_priv, 1290 service_task); 1291 u32 status = ioread32be(&priv->reg_bar0->device_status); 1292 1293 gve_handle_status(priv, status); 1294 1295 gve_handle_reset(priv); 1296 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1297 } 1298 1299 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 1300 { 1301 int num_ntfy; 1302 int err; 1303 1304 /* Set up the adminq */ 1305 err = gve_adminq_alloc(&priv->pdev->dev, priv); 1306 if (err) { 1307 dev_err(&priv->pdev->dev, 1308 "Failed to alloc admin queue: err=%d\n", err); 1309 return err; 1310 } 1311 1312 if (skip_describe_device) 1313 goto setup_device; 1314 1315 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 1316 /* Get the initial information we need from the device */ 1317 err = gve_adminq_describe_device(priv); 1318 if (err) { 1319 dev_err(&priv->pdev->dev, 1320 "Could not get device information: err=%d\n", err); 1321 goto err; 1322 } 1323 if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) { 1324 priv->dev->max_mtu = PAGE_SIZE; 1325 err = gve_adminq_set_mtu(priv, priv->dev->mtu); 1326 if (err) { 1327 dev_err(&priv->pdev->dev, "Could not set mtu"); 1328 goto err; 1329 } 1330 } 1331 priv->dev->mtu = priv->dev->max_mtu; 1332 num_ntfy = pci_msix_vec_count(priv->pdev); 1333 if (num_ntfy <= 0) { 1334 dev_err(&priv->pdev->dev, 1335 "could not count MSI-x vectors: err=%d\n", num_ntfy); 1336 err = num_ntfy; 1337 goto err; 1338 } else if (num_ntfy < GVE_MIN_MSIX) { 1339 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 1340 GVE_MIN_MSIX, num_ntfy); 1341 err = -EINVAL; 1342 goto err; 1343 } 1344 1345 priv->num_registered_pages = 0; 1346 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 1347 /* gvnic has one Notification Block per MSI-x vector, except for the 1348 * management vector 1349 */ 1350 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 1351 priv->mgmt_msix_idx = priv->num_ntfy_blks; 1352 1353 priv->tx_cfg.max_queues = 1354 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 1355 priv->rx_cfg.max_queues = 1356 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 1357 1358 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 1359 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 1360 if (priv->default_num_queues > 0) { 1361 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 1362 priv->tx_cfg.num_queues); 1363 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 1364 priv->rx_cfg.num_queues); 1365 } 1366 1367 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 1368 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 1369 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 1370 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 1371 1372 setup_device: 1373 err = gve_setup_device_resources(priv); 1374 if (!err) 1375 return 0; 1376 err: 1377 gve_adminq_free(&priv->pdev->dev, priv); 1378 return err; 1379 } 1380 1381 static void gve_teardown_priv_resources(struct gve_priv *priv) 1382 { 1383 gve_teardown_device_resources(priv); 1384 gve_adminq_free(&priv->pdev->dev, priv); 1385 } 1386 1387 static void gve_trigger_reset(struct gve_priv *priv) 1388 { 1389 /* Reset the device by releasing the AQ */ 1390 gve_adminq_release(priv); 1391 } 1392 1393 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 1394 { 1395 gve_trigger_reset(priv); 1396 /* With the reset having already happened, close cannot fail */ 1397 if (was_up) 1398 gve_close(priv->dev); 1399 gve_teardown_priv_resources(priv); 1400 } 1401 1402 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 1403 { 1404 int err; 1405 1406 err = gve_init_priv(priv, true); 1407 if (err) 1408 goto err; 1409 if (was_up) { 1410 err = gve_open(priv->dev); 1411 if (err) 1412 goto err; 1413 } 1414 return 0; 1415 err: 1416 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 1417 gve_turndown(priv); 1418 return err; 1419 } 1420 1421 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 1422 { 1423 bool was_up = netif_carrier_ok(priv->dev); 1424 int err; 1425 1426 dev_info(&priv->pdev->dev, "Performing reset\n"); 1427 gve_clear_do_reset(priv); 1428 gve_set_reset_in_progress(priv); 1429 /* If we aren't attempting to teardown normally, just go turndown and 1430 * reset right away. 1431 */ 1432 if (!attempt_teardown) { 1433 gve_turndown(priv); 1434 gve_reset_and_teardown(priv, was_up); 1435 } else { 1436 /* Otherwise attempt to close normally */ 1437 if (was_up) { 1438 err = gve_close(priv->dev); 1439 /* If that fails reset as we did above */ 1440 if (err) 1441 gve_reset_and_teardown(priv, was_up); 1442 } 1443 /* Clean up any remaining resources */ 1444 gve_teardown_priv_resources(priv); 1445 } 1446 1447 /* Set it all back up */ 1448 err = gve_reset_recovery(priv, was_up); 1449 gve_clear_reset_in_progress(priv); 1450 priv->reset_cnt++; 1451 priv->interface_up_cnt = 0; 1452 priv->interface_down_cnt = 0; 1453 priv->stats_report_trigger_cnt = 0; 1454 return err; 1455 } 1456 1457 static void gve_write_version(u8 __iomem *driver_version_register) 1458 { 1459 const char *c = gve_version_prefix; 1460 1461 while (*c) { 1462 writeb(*c, driver_version_register); 1463 c++; 1464 } 1465 1466 c = gve_version_str; 1467 while (*c) { 1468 writeb(*c, driver_version_register); 1469 c++; 1470 } 1471 writeb('\n', driver_version_register); 1472 } 1473 1474 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1475 { 1476 int max_tx_queues, max_rx_queues; 1477 struct net_device *dev; 1478 __be32 __iomem *db_bar; 1479 struct gve_registers __iomem *reg_bar; 1480 struct gve_priv *priv; 1481 int err; 1482 1483 err = pci_enable_device(pdev); 1484 if (err) 1485 return err; 1486 1487 err = pci_request_regions(pdev, "gvnic-cfg"); 1488 if (err) 1489 goto abort_with_enabled; 1490 1491 pci_set_master(pdev); 1492 1493 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 1494 if (err) { 1495 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 1496 goto abort_with_pci_region; 1497 } 1498 1499 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 1500 if (!reg_bar) { 1501 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 1502 err = -ENOMEM; 1503 goto abort_with_pci_region; 1504 } 1505 1506 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 1507 if (!db_bar) { 1508 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 1509 err = -ENOMEM; 1510 goto abort_with_reg_bar; 1511 } 1512 1513 gve_write_version(®_bar->driver_version); 1514 /* Get max queues to alloc etherdev */ 1515 max_tx_queues = ioread32be(®_bar->max_tx_queues); 1516 max_rx_queues = ioread32be(®_bar->max_rx_queues); 1517 /* Alloc and setup the netdev and priv */ 1518 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 1519 if (!dev) { 1520 dev_err(&pdev->dev, "could not allocate netdev\n"); 1521 err = -ENOMEM; 1522 goto abort_with_db_bar; 1523 } 1524 SET_NETDEV_DEV(dev, &pdev->dev); 1525 pci_set_drvdata(pdev, dev); 1526 dev->ethtool_ops = &gve_ethtool_ops; 1527 dev->netdev_ops = &gve_netdev_ops; 1528 1529 /* Set default and supported features. 1530 * 1531 * Features might be set in other locations as well (such as 1532 * `gve_adminq_describe_device`). 1533 */ 1534 dev->hw_features = NETIF_F_HIGHDMA; 1535 dev->hw_features |= NETIF_F_SG; 1536 dev->hw_features |= NETIF_F_HW_CSUM; 1537 dev->hw_features |= NETIF_F_TSO; 1538 dev->hw_features |= NETIF_F_TSO6; 1539 dev->hw_features |= NETIF_F_TSO_ECN; 1540 dev->hw_features |= NETIF_F_RXCSUM; 1541 dev->hw_features |= NETIF_F_RXHASH; 1542 dev->features = dev->hw_features; 1543 dev->watchdog_timeo = 5 * HZ; 1544 dev->min_mtu = ETH_MIN_MTU; 1545 netif_carrier_off(dev); 1546 1547 priv = netdev_priv(dev); 1548 priv->dev = dev; 1549 priv->pdev = pdev; 1550 priv->msg_enable = DEFAULT_MSG_LEVEL; 1551 priv->reg_bar0 = reg_bar; 1552 priv->db_bar2 = db_bar; 1553 priv->service_task_flags = 0x0; 1554 priv->state_flags = 0x0; 1555 priv->ethtool_flags = 0x0; 1556 1557 gve_set_probe_in_progress(priv); 1558 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 1559 if (!priv->gve_wq) { 1560 dev_err(&pdev->dev, "Could not allocate workqueue"); 1561 err = -ENOMEM; 1562 goto abort_with_netdev; 1563 } 1564 INIT_WORK(&priv->service_task, gve_service_task); 1565 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 1566 priv->tx_cfg.max_queues = max_tx_queues; 1567 priv->rx_cfg.max_queues = max_rx_queues; 1568 1569 err = gve_init_priv(priv, false); 1570 if (err) 1571 goto abort_with_wq; 1572 1573 err = register_netdev(dev); 1574 if (err) 1575 goto abort_with_gve_init; 1576 1577 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 1578 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 1579 gve_clear_probe_in_progress(priv); 1580 queue_work(priv->gve_wq, &priv->service_task); 1581 return 0; 1582 1583 abort_with_gve_init: 1584 gve_teardown_priv_resources(priv); 1585 1586 abort_with_wq: 1587 destroy_workqueue(priv->gve_wq); 1588 1589 abort_with_netdev: 1590 free_netdev(dev); 1591 1592 abort_with_db_bar: 1593 pci_iounmap(pdev, db_bar); 1594 1595 abort_with_reg_bar: 1596 pci_iounmap(pdev, reg_bar); 1597 1598 abort_with_pci_region: 1599 pci_release_regions(pdev); 1600 1601 abort_with_enabled: 1602 pci_disable_device(pdev); 1603 return err; 1604 } 1605 1606 static void gve_remove(struct pci_dev *pdev) 1607 { 1608 struct net_device *netdev = pci_get_drvdata(pdev); 1609 struct gve_priv *priv = netdev_priv(netdev); 1610 __be32 __iomem *db_bar = priv->db_bar2; 1611 void __iomem *reg_bar = priv->reg_bar0; 1612 1613 unregister_netdev(netdev); 1614 gve_teardown_priv_resources(priv); 1615 destroy_workqueue(priv->gve_wq); 1616 free_netdev(netdev); 1617 pci_iounmap(pdev, db_bar); 1618 pci_iounmap(pdev, reg_bar); 1619 pci_release_regions(pdev); 1620 pci_disable_device(pdev); 1621 } 1622 1623 static const struct pci_device_id gve_id_table[] = { 1624 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 1625 { } 1626 }; 1627 1628 static struct pci_driver gvnic_driver = { 1629 .name = "gvnic", 1630 .id_table = gve_id_table, 1631 .probe = gve_probe, 1632 .remove = gve_remove, 1633 }; 1634 1635 module_pci_driver(gvnic_driver); 1636 1637 MODULE_DEVICE_TABLE(pci, gve_id_table); 1638 MODULE_AUTHOR("Google, Inc."); 1639 MODULE_DESCRIPTION("gVNIC Driver"); 1640 MODULE_LICENSE("Dual MIT/GPL"); 1641 MODULE_VERSION(GVE_VERSION); 1642