1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/cpumask.h> 8 #include <linux/etherdevice.h> 9 #include <linux/interrupt.h> 10 #include <linux/module.h> 11 #include <linux/pci.h> 12 #include <linux/sched.h> 13 #include <linux/timer.h> 14 #include <linux/workqueue.h> 15 #include <linux/utsname.h> 16 #include <linux/version.h> 17 #include <net/sch_generic.h> 18 #include "gve.h" 19 #include "gve_dqo.h" 20 #include "gve_adminq.h" 21 #include "gve_register.h" 22 23 #define GVE_DEFAULT_RX_COPYBREAK (256) 24 25 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 26 #define GVE_VERSION "1.0.0" 27 #define GVE_VERSION_PREFIX "GVE-" 28 29 // Minimum amount of time between queue kicks in msec (10 seconds) 30 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 31 32 const char gve_version_str[] = GVE_VERSION; 33 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 34 35 static int gve_verify_driver_compatibility(struct gve_priv *priv) 36 { 37 int err; 38 struct gve_driver_info *driver_info; 39 dma_addr_t driver_info_bus; 40 41 driver_info = dma_alloc_coherent(&priv->pdev->dev, 42 sizeof(struct gve_driver_info), 43 &driver_info_bus, GFP_KERNEL); 44 if (!driver_info) 45 return -ENOMEM; 46 47 *driver_info = (struct gve_driver_info) { 48 .os_type = 1, /* Linux */ 49 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 50 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 51 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 52 .driver_capability_flags = { 53 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 54 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 55 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 56 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 57 }, 58 }; 59 strscpy(driver_info->os_version_str1, utsname()->release, 60 sizeof(driver_info->os_version_str1)); 61 strscpy(driver_info->os_version_str2, utsname()->version, 62 sizeof(driver_info->os_version_str2)); 63 64 err = gve_adminq_verify_driver_compatibility(priv, 65 sizeof(struct gve_driver_info), 66 driver_info_bus); 67 68 /* It's ok if the device doesn't support this */ 69 if (err == -EOPNOTSUPP) 70 err = 0; 71 72 dma_free_coherent(&priv->pdev->dev, 73 sizeof(struct gve_driver_info), 74 driver_info, driver_info_bus); 75 return err; 76 } 77 78 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 79 { 80 struct gve_priv *priv = netdev_priv(dev); 81 82 if (gve_is_gqi(priv)) 83 return gve_tx(skb, dev); 84 else 85 return gve_tx_dqo(skb, dev); 86 } 87 88 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 89 { 90 struct gve_priv *priv = netdev_priv(dev); 91 unsigned int start; 92 u64 packets, bytes; 93 int ring; 94 95 if (priv->rx) { 96 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 97 do { 98 start = 99 u64_stats_fetch_begin(&priv->rx[ring].statss); 100 packets = priv->rx[ring].rpackets; 101 bytes = priv->rx[ring].rbytes; 102 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 103 start)); 104 s->rx_packets += packets; 105 s->rx_bytes += bytes; 106 } 107 } 108 if (priv->tx) { 109 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { 110 do { 111 start = 112 u64_stats_fetch_begin(&priv->tx[ring].statss); 113 packets = priv->tx[ring].pkt_done; 114 bytes = priv->tx[ring].bytes_done; 115 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 116 start)); 117 s->tx_packets += packets; 118 s->tx_bytes += bytes; 119 } 120 } 121 } 122 123 static int gve_alloc_counter_array(struct gve_priv *priv) 124 { 125 priv->counter_array = 126 dma_alloc_coherent(&priv->pdev->dev, 127 priv->num_event_counters * 128 sizeof(*priv->counter_array), 129 &priv->counter_array_bus, GFP_KERNEL); 130 if (!priv->counter_array) 131 return -ENOMEM; 132 133 return 0; 134 } 135 136 static void gve_free_counter_array(struct gve_priv *priv) 137 { 138 if (!priv->counter_array) 139 return; 140 141 dma_free_coherent(&priv->pdev->dev, 142 priv->num_event_counters * 143 sizeof(*priv->counter_array), 144 priv->counter_array, priv->counter_array_bus); 145 priv->counter_array = NULL; 146 } 147 148 /* NIC requests to report stats */ 149 static void gve_stats_report_task(struct work_struct *work) 150 { 151 struct gve_priv *priv = container_of(work, struct gve_priv, 152 stats_report_task); 153 if (gve_get_do_report_stats(priv)) { 154 gve_handle_report_stats(priv); 155 gve_clear_do_report_stats(priv); 156 } 157 } 158 159 static void gve_stats_report_schedule(struct gve_priv *priv) 160 { 161 if (!gve_get_probe_in_progress(priv) && 162 !gve_get_reset_in_progress(priv)) { 163 gve_set_do_report_stats(priv); 164 queue_work(priv->gve_wq, &priv->stats_report_task); 165 } 166 } 167 168 static void gve_stats_report_timer(struct timer_list *t) 169 { 170 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 171 172 mod_timer(&priv->stats_report_timer, 173 round_jiffies(jiffies + 174 msecs_to_jiffies(priv->stats_report_timer_period))); 175 gve_stats_report_schedule(priv); 176 } 177 178 static int gve_alloc_stats_report(struct gve_priv *priv) 179 { 180 int tx_stats_num, rx_stats_num; 181 182 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 183 priv->tx_cfg.num_queues; 184 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 185 priv->rx_cfg.num_queues; 186 priv->stats_report_len = struct_size(priv->stats_report, stats, 187 tx_stats_num + rx_stats_num); 188 priv->stats_report = 189 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 190 &priv->stats_report_bus, GFP_KERNEL); 191 if (!priv->stats_report) 192 return -ENOMEM; 193 /* Set up timer for the report-stats task */ 194 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 195 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 196 return 0; 197 } 198 199 static void gve_free_stats_report(struct gve_priv *priv) 200 { 201 if (!priv->stats_report) 202 return; 203 204 del_timer_sync(&priv->stats_report_timer); 205 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 206 priv->stats_report, priv->stats_report_bus); 207 priv->stats_report = NULL; 208 } 209 210 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 211 { 212 struct gve_priv *priv = arg; 213 214 queue_work(priv->gve_wq, &priv->service_task); 215 return IRQ_HANDLED; 216 } 217 218 static irqreturn_t gve_intr(int irq, void *arg) 219 { 220 struct gve_notify_block *block = arg; 221 struct gve_priv *priv = block->priv; 222 223 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 224 napi_schedule_irqoff(&block->napi); 225 return IRQ_HANDLED; 226 } 227 228 static irqreturn_t gve_intr_dqo(int irq, void *arg) 229 { 230 struct gve_notify_block *block = arg; 231 232 /* Interrupts are automatically masked */ 233 napi_schedule_irqoff(&block->napi); 234 return IRQ_HANDLED; 235 } 236 237 static int gve_napi_poll(struct napi_struct *napi, int budget) 238 { 239 struct gve_notify_block *block; 240 __be32 __iomem *irq_doorbell; 241 bool reschedule = false; 242 struct gve_priv *priv; 243 int work_done = 0; 244 245 block = container_of(napi, struct gve_notify_block, napi); 246 priv = block->priv; 247 248 if (block->tx) 249 reschedule |= gve_tx_poll(block, budget); 250 if (block->rx) { 251 work_done = gve_rx_poll(block, budget); 252 reschedule |= work_done == budget; 253 } 254 255 if (reschedule) 256 return budget; 257 258 /* Complete processing - don't unmask irq if busy polling is enabled */ 259 if (likely(napi_complete_done(napi, work_done))) { 260 irq_doorbell = gve_irq_doorbell(priv, block); 261 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 262 263 /* Ensure IRQ ACK is visible before we check pending work. 264 * If queue had issued updates, it would be truly visible. 265 */ 266 mb(); 267 268 if (block->tx) 269 reschedule |= gve_tx_clean_pending(priv, block->tx); 270 if (block->rx) 271 reschedule |= gve_rx_work_pending(block->rx); 272 273 if (reschedule && napi_reschedule(napi)) 274 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 275 } 276 return work_done; 277 } 278 279 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 280 { 281 struct gve_notify_block *block = 282 container_of(napi, struct gve_notify_block, napi); 283 struct gve_priv *priv = block->priv; 284 bool reschedule = false; 285 int work_done = 0; 286 287 /* Clear PCI MSI-X Pending Bit Array (PBA) 288 * 289 * This bit is set if an interrupt event occurs while the vector is 290 * masked. If this bit is set and we reenable the interrupt, it will 291 * fire again. Since we're just about to poll the queue state, we don't 292 * need it to fire again. 293 * 294 * Under high softirq load, it's possible that the interrupt condition 295 * is triggered twice before we got the chance to process it. 296 */ 297 gve_write_irq_doorbell_dqo(priv, block, 298 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); 299 300 if (block->tx) 301 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 302 303 if (block->rx) { 304 work_done = gve_rx_poll_dqo(block, budget); 305 reschedule |= work_done == budget; 306 } 307 308 if (reschedule) 309 return budget; 310 311 if (likely(napi_complete_done(napi, work_done))) { 312 /* Enable interrupts again. 313 * 314 * We don't need to repoll afterwards because HW supports the 315 * PCI MSI-X PBA feature. 316 * 317 * Another interrupt would be triggered if a new event came in 318 * since the last one. 319 */ 320 gve_write_irq_doorbell_dqo(priv, block, 321 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 322 } 323 324 return work_done; 325 } 326 327 static int gve_alloc_notify_blocks(struct gve_priv *priv) 328 { 329 int num_vecs_requested = priv->num_ntfy_blks + 1; 330 unsigned int active_cpus; 331 int vecs_enabled; 332 int i, j; 333 int err; 334 335 priv->msix_vectors = kvcalloc(num_vecs_requested, 336 sizeof(*priv->msix_vectors), GFP_KERNEL); 337 if (!priv->msix_vectors) 338 return -ENOMEM; 339 for (i = 0; i < num_vecs_requested; i++) 340 priv->msix_vectors[i].entry = i; 341 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 342 GVE_MIN_MSIX, num_vecs_requested); 343 if (vecs_enabled < 0) { 344 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 345 GVE_MIN_MSIX, vecs_enabled); 346 err = vecs_enabled; 347 goto abort_with_msix_vectors; 348 } 349 if (vecs_enabled != num_vecs_requested) { 350 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 351 int vecs_per_type = new_num_ntfy_blks / 2; 352 int vecs_left = new_num_ntfy_blks % 2; 353 354 priv->num_ntfy_blks = new_num_ntfy_blks; 355 priv->mgmt_msix_idx = priv->num_ntfy_blks; 356 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 357 vecs_per_type); 358 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 359 vecs_per_type + vecs_left); 360 dev_err(&priv->pdev->dev, 361 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 362 vecs_enabled, priv->tx_cfg.max_queues, 363 priv->rx_cfg.max_queues); 364 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 365 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 366 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 367 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 368 } 369 /* Half the notification blocks go to TX and half to RX */ 370 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 371 372 /* Setup Management Vector - the last vector */ 373 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 374 pci_name(priv->pdev)); 375 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 376 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 377 if (err) { 378 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 379 goto abort_with_msix_enabled; 380 } 381 priv->irq_db_indices = 382 dma_alloc_coherent(&priv->pdev->dev, 383 priv->num_ntfy_blks * 384 sizeof(*priv->irq_db_indices), 385 &priv->irq_db_indices_bus, GFP_KERNEL); 386 if (!priv->irq_db_indices) { 387 err = -ENOMEM; 388 goto abort_with_mgmt_vector; 389 } 390 391 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 392 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 393 if (!priv->ntfy_blocks) { 394 err = -ENOMEM; 395 goto abort_with_irq_db_indices; 396 } 397 398 /* Setup the other blocks - the first n-1 vectors */ 399 for (i = 0; i < priv->num_ntfy_blks; i++) { 400 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 401 int msix_idx = i; 402 403 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 404 i, pci_name(priv->pdev)); 405 block->priv = priv; 406 err = request_irq(priv->msix_vectors[msix_idx].vector, 407 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 408 0, block->name, block); 409 if (err) { 410 dev_err(&priv->pdev->dev, 411 "Failed to receive msix vector %d\n", i); 412 goto abort_with_some_ntfy_blocks; 413 } 414 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 415 get_cpu_mask(i % active_cpus)); 416 block->irq_db_index = &priv->irq_db_indices[i].index; 417 } 418 return 0; 419 abort_with_some_ntfy_blocks: 420 for (j = 0; j < i; j++) { 421 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 422 int msix_idx = j; 423 424 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 425 NULL); 426 free_irq(priv->msix_vectors[msix_idx].vector, block); 427 } 428 kvfree(priv->ntfy_blocks); 429 priv->ntfy_blocks = NULL; 430 abort_with_irq_db_indices: 431 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 432 sizeof(*priv->irq_db_indices), 433 priv->irq_db_indices, priv->irq_db_indices_bus); 434 priv->irq_db_indices = NULL; 435 abort_with_mgmt_vector: 436 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 437 abort_with_msix_enabled: 438 pci_disable_msix(priv->pdev); 439 abort_with_msix_vectors: 440 kvfree(priv->msix_vectors); 441 priv->msix_vectors = NULL; 442 return err; 443 } 444 445 static void gve_free_notify_blocks(struct gve_priv *priv) 446 { 447 int i; 448 449 if (!priv->msix_vectors) 450 return; 451 452 /* Free the irqs */ 453 for (i = 0; i < priv->num_ntfy_blks; i++) { 454 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 455 int msix_idx = i; 456 457 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 458 NULL); 459 free_irq(priv->msix_vectors[msix_idx].vector, block); 460 } 461 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 462 kvfree(priv->ntfy_blocks); 463 priv->ntfy_blocks = NULL; 464 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 465 sizeof(*priv->irq_db_indices), 466 priv->irq_db_indices, priv->irq_db_indices_bus); 467 priv->irq_db_indices = NULL; 468 pci_disable_msix(priv->pdev); 469 kvfree(priv->msix_vectors); 470 priv->msix_vectors = NULL; 471 } 472 473 static int gve_setup_device_resources(struct gve_priv *priv) 474 { 475 int err; 476 477 err = gve_alloc_counter_array(priv); 478 if (err) 479 return err; 480 err = gve_alloc_notify_blocks(priv); 481 if (err) 482 goto abort_with_counter; 483 err = gve_alloc_stats_report(priv); 484 if (err) 485 goto abort_with_ntfy_blocks; 486 err = gve_adminq_configure_device_resources(priv, 487 priv->counter_array_bus, 488 priv->num_event_counters, 489 priv->irq_db_indices_bus, 490 priv->num_ntfy_blks); 491 if (unlikely(err)) { 492 dev_err(&priv->pdev->dev, 493 "could not setup device_resources: err=%d\n", err); 494 err = -ENXIO; 495 goto abort_with_stats_report; 496 } 497 498 if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 499 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 500 GFP_KERNEL); 501 if (!priv->ptype_lut_dqo) { 502 err = -ENOMEM; 503 goto abort_with_stats_report; 504 } 505 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 506 if (err) { 507 dev_err(&priv->pdev->dev, 508 "Failed to get ptype map: err=%d\n", err); 509 goto abort_with_ptype_lut; 510 } 511 } 512 513 err = gve_adminq_report_stats(priv, priv->stats_report_len, 514 priv->stats_report_bus, 515 GVE_STATS_REPORT_TIMER_PERIOD); 516 if (err) 517 dev_err(&priv->pdev->dev, 518 "Failed to report stats: err=%d\n", err); 519 gve_set_device_resources_ok(priv); 520 return 0; 521 522 abort_with_ptype_lut: 523 kvfree(priv->ptype_lut_dqo); 524 priv->ptype_lut_dqo = NULL; 525 abort_with_stats_report: 526 gve_free_stats_report(priv); 527 abort_with_ntfy_blocks: 528 gve_free_notify_blocks(priv); 529 abort_with_counter: 530 gve_free_counter_array(priv); 531 532 return err; 533 } 534 535 static void gve_trigger_reset(struct gve_priv *priv); 536 537 static void gve_teardown_device_resources(struct gve_priv *priv) 538 { 539 int err; 540 541 /* Tell device its resources are being freed */ 542 if (gve_get_device_resources_ok(priv)) { 543 /* detach the stats report */ 544 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 545 if (err) { 546 dev_err(&priv->pdev->dev, 547 "Failed to detach stats report: err=%d\n", err); 548 gve_trigger_reset(priv); 549 } 550 err = gve_adminq_deconfigure_device_resources(priv); 551 if (err) { 552 dev_err(&priv->pdev->dev, 553 "Could not deconfigure device resources: err=%d\n", 554 err); 555 gve_trigger_reset(priv); 556 } 557 } 558 559 kvfree(priv->ptype_lut_dqo); 560 priv->ptype_lut_dqo = NULL; 561 562 gve_free_counter_array(priv); 563 gve_free_notify_blocks(priv); 564 gve_free_stats_report(priv); 565 gve_clear_device_resources_ok(priv); 566 } 567 568 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 569 int (*gve_poll)(struct napi_struct *, int)) 570 { 571 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 572 573 netif_napi_add(priv->dev, &block->napi, gve_poll); 574 } 575 576 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 577 { 578 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 579 580 netif_napi_del(&block->napi); 581 } 582 583 static int gve_register_qpls(struct gve_priv *priv) 584 { 585 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 586 int err; 587 int i; 588 589 for (i = 0; i < num_qpls; i++) { 590 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 591 if (err) { 592 netif_err(priv, drv, priv->dev, 593 "failed to register queue page list %d\n", 594 priv->qpls[i].id); 595 /* This failure will trigger a reset - no need to clean 596 * up 597 */ 598 return err; 599 } 600 } 601 return 0; 602 } 603 604 static int gve_unregister_qpls(struct gve_priv *priv) 605 { 606 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 607 int err; 608 int i; 609 610 for (i = 0; i < num_qpls; i++) { 611 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 612 /* This failure will trigger a reset - no need to clean up */ 613 if (err) { 614 netif_err(priv, drv, priv->dev, 615 "Failed to unregister queue page list %d\n", 616 priv->qpls[i].id); 617 return err; 618 } 619 } 620 return 0; 621 } 622 623 static int gve_create_rings(struct gve_priv *priv) 624 { 625 int err; 626 int i; 627 628 err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); 629 if (err) { 630 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 631 priv->tx_cfg.num_queues); 632 /* This failure will trigger a reset - no need to clean 633 * up 634 */ 635 return err; 636 } 637 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 638 priv->tx_cfg.num_queues); 639 640 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 641 if (err) { 642 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 643 priv->rx_cfg.num_queues); 644 /* This failure will trigger a reset - no need to clean 645 * up 646 */ 647 return err; 648 } 649 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 650 priv->rx_cfg.num_queues); 651 652 if (gve_is_gqi(priv)) { 653 /* Rx data ring has been prefilled with packet buffers at queue 654 * allocation time. 655 * 656 * Write the doorbell to provide descriptor slots and packet 657 * buffers to the NIC. 658 */ 659 for (i = 0; i < priv->rx_cfg.num_queues; i++) 660 gve_rx_write_doorbell(priv, &priv->rx[i]); 661 } else { 662 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 663 /* Post buffers and ring doorbell. */ 664 gve_rx_post_buffers_dqo(&priv->rx[i]); 665 } 666 } 667 668 return 0; 669 } 670 671 static void add_napi_init_sync_stats(struct gve_priv *priv, 672 int (*napi_poll)(struct napi_struct *napi, 673 int budget)) 674 { 675 int i; 676 677 /* Add tx napi & init sync stats*/ 678 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 679 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 680 681 u64_stats_init(&priv->tx[i].statss); 682 priv->tx[i].ntfy_id = ntfy_idx; 683 gve_add_napi(priv, ntfy_idx, napi_poll); 684 } 685 /* Add rx napi & init sync stats*/ 686 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 687 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 688 689 u64_stats_init(&priv->rx[i].statss); 690 priv->rx[i].ntfy_id = ntfy_idx; 691 gve_add_napi(priv, ntfy_idx, napi_poll); 692 } 693 } 694 695 static void gve_tx_free_rings(struct gve_priv *priv) 696 { 697 if (gve_is_gqi(priv)) { 698 gve_tx_free_rings_gqi(priv); 699 } else { 700 gve_tx_free_rings_dqo(priv); 701 } 702 } 703 704 static int gve_alloc_rings(struct gve_priv *priv) 705 { 706 int err; 707 708 /* Setup tx rings */ 709 priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx), 710 GFP_KERNEL); 711 if (!priv->tx) 712 return -ENOMEM; 713 714 if (gve_is_gqi(priv)) 715 err = gve_tx_alloc_rings(priv); 716 else 717 err = gve_tx_alloc_rings_dqo(priv); 718 if (err) 719 goto free_tx; 720 721 /* Setup rx rings */ 722 priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx), 723 GFP_KERNEL); 724 if (!priv->rx) { 725 err = -ENOMEM; 726 goto free_tx_queue; 727 } 728 729 if (gve_is_gqi(priv)) 730 err = gve_rx_alloc_rings(priv); 731 else 732 err = gve_rx_alloc_rings_dqo(priv); 733 if (err) 734 goto free_rx; 735 736 if (gve_is_gqi(priv)) 737 add_napi_init_sync_stats(priv, gve_napi_poll); 738 else 739 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 740 741 return 0; 742 743 free_rx: 744 kvfree(priv->rx); 745 priv->rx = NULL; 746 free_tx_queue: 747 gve_tx_free_rings(priv); 748 free_tx: 749 kvfree(priv->tx); 750 priv->tx = NULL; 751 return err; 752 } 753 754 static int gve_destroy_rings(struct gve_priv *priv) 755 { 756 int err; 757 758 err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues); 759 if (err) { 760 netif_err(priv, drv, priv->dev, 761 "failed to destroy tx queues\n"); 762 /* This failure will trigger a reset - no need to clean up */ 763 return err; 764 } 765 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 766 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 767 if (err) { 768 netif_err(priv, drv, priv->dev, 769 "failed to destroy rx queues\n"); 770 /* This failure will trigger a reset - no need to clean up */ 771 return err; 772 } 773 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 774 return 0; 775 } 776 777 static void gve_rx_free_rings(struct gve_priv *priv) 778 { 779 if (gve_is_gqi(priv)) 780 gve_rx_free_rings_gqi(priv); 781 else 782 gve_rx_free_rings_dqo(priv); 783 } 784 785 static void gve_free_rings(struct gve_priv *priv) 786 { 787 int ntfy_idx; 788 int i; 789 790 if (priv->tx) { 791 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 792 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 793 gve_remove_napi(priv, ntfy_idx); 794 } 795 gve_tx_free_rings(priv); 796 kvfree(priv->tx); 797 priv->tx = NULL; 798 } 799 if (priv->rx) { 800 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 801 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 802 gve_remove_napi(priv, ntfy_idx); 803 } 804 gve_rx_free_rings(priv); 805 kvfree(priv->rx); 806 priv->rx = NULL; 807 } 808 } 809 810 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 811 struct page **page, dma_addr_t *dma, 812 enum dma_data_direction dir, gfp_t gfp_flags) 813 { 814 *page = alloc_page(gfp_flags); 815 if (!*page) { 816 priv->page_alloc_fail++; 817 return -ENOMEM; 818 } 819 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 820 if (dma_mapping_error(dev, *dma)) { 821 priv->dma_mapping_error++; 822 put_page(*page); 823 return -ENOMEM; 824 } 825 return 0; 826 } 827 828 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 829 int pages) 830 { 831 struct gve_queue_page_list *qpl = &priv->qpls[id]; 832 int err; 833 int i; 834 835 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 836 netif_err(priv, drv, priv->dev, 837 "Reached max number of registered pages %llu > %llu\n", 838 pages + priv->num_registered_pages, 839 priv->max_registered_pages); 840 return -EINVAL; 841 } 842 843 qpl->id = id; 844 qpl->num_entries = 0; 845 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 846 /* caller handles clean up */ 847 if (!qpl->pages) 848 return -ENOMEM; 849 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 850 /* caller handles clean up */ 851 if (!qpl->page_buses) 852 return -ENOMEM; 853 854 for (i = 0; i < pages; i++) { 855 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 856 &qpl->page_buses[i], 857 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 858 /* caller handles clean up */ 859 if (err) 860 return -ENOMEM; 861 qpl->num_entries++; 862 } 863 priv->num_registered_pages += pages; 864 865 return 0; 866 } 867 868 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 869 enum dma_data_direction dir) 870 { 871 if (!dma_mapping_error(dev, dma)) 872 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 873 if (page) 874 put_page(page); 875 } 876 877 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 878 { 879 struct gve_queue_page_list *qpl = &priv->qpls[id]; 880 int i; 881 882 if (!qpl->pages) 883 return; 884 if (!qpl->page_buses) 885 goto free_pages; 886 887 for (i = 0; i < qpl->num_entries; i++) 888 gve_free_page(&priv->pdev->dev, qpl->pages[i], 889 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 890 891 kvfree(qpl->page_buses); 892 free_pages: 893 kvfree(qpl->pages); 894 priv->num_registered_pages -= qpl->num_entries; 895 } 896 897 static int gve_alloc_qpls(struct gve_priv *priv) 898 { 899 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 900 int i, j; 901 int err; 902 903 if (num_qpls == 0) 904 return 0; 905 906 priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL); 907 if (!priv->qpls) 908 return -ENOMEM; 909 910 for (i = 0; i < gve_num_tx_qpls(priv); i++) { 911 err = gve_alloc_queue_page_list(priv, i, 912 priv->tx_pages_per_qpl); 913 if (err) 914 goto free_qpls; 915 } 916 for (; i < num_qpls; i++) { 917 err = gve_alloc_queue_page_list(priv, i, 918 priv->rx_data_slot_cnt); 919 if (err) 920 goto free_qpls; 921 } 922 923 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * 924 sizeof(unsigned long) * BITS_PER_BYTE; 925 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls), 926 sizeof(unsigned long), GFP_KERNEL); 927 if (!priv->qpl_cfg.qpl_id_map) { 928 err = -ENOMEM; 929 goto free_qpls; 930 } 931 932 return 0; 933 934 free_qpls: 935 for (j = 0; j <= i; j++) 936 gve_free_queue_page_list(priv, j); 937 kvfree(priv->qpls); 938 return err; 939 } 940 941 static void gve_free_qpls(struct gve_priv *priv) 942 { 943 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 944 int i; 945 946 if (num_qpls == 0) 947 return; 948 949 kvfree(priv->qpl_cfg.qpl_id_map); 950 951 for (i = 0; i < num_qpls; i++) 952 gve_free_queue_page_list(priv, i); 953 954 kvfree(priv->qpls); 955 } 956 957 /* Use this to schedule a reset when the device is capable of continuing 958 * to handle other requests in its current state. If it is not, do a reset 959 * in thread instead. 960 */ 961 void gve_schedule_reset(struct gve_priv *priv) 962 { 963 gve_set_do_reset(priv); 964 queue_work(priv->gve_wq, &priv->service_task); 965 } 966 967 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 968 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 969 static void gve_turndown(struct gve_priv *priv); 970 static void gve_turnup(struct gve_priv *priv); 971 972 static int gve_open(struct net_device *dev) 973 { 974 struct gve_priv *priv = netdev_priv(dev); 975 int err; 976 977 err = gve_alloc_qpls(priv); 978 if (err) 979 return err; 980 981 err = gve_alloc_rings(priv); 982 if (err) 983 goto free_qpls; 984 985 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 986 if (err) 987 goto free_rings; 988 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 989 if (err) 990 goto free_rings; 991 992 err = gve_register_qpls(priv); 993 if (err) 994 goto reset; 995 996 if (!gve_is_gqi(priv)) { 997 /* Hard code this for now. This may be tuned in the future for 998 * performance. 999 */ 1000 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1001 } 1002 err = gve_create_rings(priv); 1003 if (err) 1004 goto reset; 1005 1006 gve_set_device_rings_ok(priv); 1007 1008 if (gve_get_report_stats(priv)) 1009 mod_timer(&priv->stats_report_timer, 1010 round_jiffies(jiffies + 1011 msecs_to_jiffies(priv->stats_report_timer_period))); 1012 1013 gve_turnup(priv); 1014 queue_work(priv->gve_wq, &priv->service_task); 1015 priv->interface_up_cnt++; 1016 return 0; 1017 1018 free_rings: 1019 gve_free_rings(priv); 1020 free_qpls: 1021 gve_free_qpls(priv); 1022 return err; 1023 1024 reset: 1025 /* This must have been called from a reset due to the rtnl lock 1026 * so just return at this point. 1027 */ 1028 if (gve_get_reset_in_progress(priv)) 1029 return err; 1030 /* Otherwise reset before returning */ 1031 gve_reset_and_teardown(priv, true); 1032 /* if this fails there is nothing we can do so just ignore the return */ 1033 gve_reset_recovery(priv, false); 1034 /* return the original error */ 1035 return err; 1036 } 1037 1038 static int gve_close(struct net_device *dev) 1039 { 1040 struct gve_priv *priv = netdev_priv(dev); 1041 int err; 1042 1043 netif_carrier_off(dev); 1044 if (gve_get_device_rings_ok(priv)) { 1045 gve_turndown(priv); 1046 err = gve_destroy_rings(priv); 1047 if (err) 1048 goto err; 1049 err = gve_unregister_qpls(priv); 1050 if (err) 1051 goto err; 1052 gve_clear_device_rings_ok(priv); 1053 } 1054 del_timer_sync(&priv->stats_report_timer); 1055 1056 gve_free_rings(priv); 1057 gve_free_qpls(priv); 1058 priv->interface_down_cnt++; 1059 return 0; 1060 1061 err: 1062 /* This must have been called from a reset due to the rtnl lock 1063 * so just return at this point. 1064 */ 1065 if (gve_get_reset_in_progress(priv)) 1066 return err; 1067 /* Otherwise reset before returning */ 1068 gve_reset_and_teardown(priv, true); 1069 return gve_reset_recovery(priv, false); 1070 } 1071 1072 int gve_adjust_queues(struct gve_priv *priv, 1073 struct gve_queue_config new_rx_config, 1074 struct gve_queue_config new_tx_config) 1075 { 1076 int err; 1077 1078 if (netif_carrier_ok(priv->dev)) { 1079 /* To make this process as simple as possible we teardown the 1080 * device, set the new configuration, and then bring the device 1081 * up again. 1082 */ 1083 err = gve_close(priv->dev); 1084 /* we have already tried to reset in close, 1085 * just fail at this point 1086 */ 1087 if (err) 1088 return err; 1089 priv->tx_cfg = new_tx_config; 1090 priv->rx_cfg = new_rx_config; 1091 1092 err = gve_open(priv->dev); 1093 if (err) 1094 goto err; 1095 1096 return 0; 1097 } 1098 /* Set the config for the next up. */ 1099 priv->tx_cfg = new_tx_config; 1100 priv->rx_cfg = new_rx_config; 1101 1102 return 0; 1103 err: 1104 netif_err(priv, drv, priv->dev, 1105 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1106 gve_turndown(priv); 1107 return err; 1108 } 1109 1110 static void gve_turndown(struct gve_priv *priv) 1111 { 1112 int idx; 1113 1114 if (netif_carrier_ok(priv->dev)) 1115 netif_carrier_off(priv->dev); 1116 1117 if (!gve_get_napi_enabled(priv)) 1118 return; 1119 1120 /* Disable napi to prevent more work from coming in */ 1121 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 1122 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1123 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1124 1125 napi_disable(&block->napi); 1126 } 1127 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1128 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1129 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1130 1131 napi_disable(&block->napi); 1132 } 1133 1134 /* Stop tx queues */ 1135 netif_tx_disable(priv->dev); 1136 1137 gve_clear_napi_enabled(priv); 1138 gve_clear_report_stats(priv); 1139 } 1140 1141 static void gve_turnup(struct gve_priv *priv) 1142 { 1143 int idx; 1144 1145 /* Start the tx queues */ 1146 netif_tx_start_all_queues(priv->dev); 1147 1148 /* Enable napi and unmask interrupts for all queues */ 1149 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 1150 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1151 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1152 1153 napi_enable(&block->napi); 1154 if (gve_is_gqi(priv)) { 1155 iowrite32be(0, gve_irq_doorbell(priv, block)); 1156 } else { 1157 gve_set_itr_coalesce_usecs_dqo(priv, block, 1158 priv->tx_coalesce_usecs); 1159 } 1160 } 1161 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1162 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1163 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1164 1165 napi_enable(&block->napi); 1166 if (gve_is_gqi(priv)) { 1167 iowrite32be(0, gve_irq_doorbell(priv, block)); 1168 } else { 1169 gve_set_itr_coalesce_usecs_dqo(priv, block, 1170 priv->rx_coalesce_usecs); 1171 } 1172 } 1173 1174 gve_set_napi_enabled(priv); 1175 } 1176 1177 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1178 { 1179 struct gve_notify_block *block; 1180 struct gve_tx_ring *tx = NULL; 1181 struct gve_priv *priv; 1182 u32 last_nic_done; 1183 u32 current_time; 1184 u32 ntfy_idx; 1185 1186 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1187 priv = netdev_priv(dev); 1188 if (txqueue > priv->tx_cfg.num_queues) 1189 goto reset; 1190 1191 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1192 if (ntfy_idx >= priv->num_ntfy_blks) 1193 goto reset; 1194 1195 block = &priv->ntfy_blocks[ntfy_idx]; 1196 tx = block->tx; 1197 1198 current_time = jiffies_to_msecs(jiffies); 1199 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1200 goto reset; 1201 1202 /* Check to see if there are missed completions, which will allow us to 1203 * kick the queue. 1204 */ 1205 last_nic_done = gve_tx_load_event_counter(priv, tx); 1206 if (last_nic_done - tx->done) { 1207 netdev_info(dev, "Kicking queue %d", txqueue); 1208 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1209 napi_schedule(&block->napi); 1210 tx->last_kick_msec = current_time; 1211 goto out; 1212 } // Else reset. 1213 1214 reset: 1215 gve_schedule_reset(priv); 1216 1217 out: 1218 if (tx) 1219 tx->queue_timeout++; 1220 priv->tx_timeo_cnt++; 1221 } 1222 1223 static int gve_set_features(struct net_device *netdev, 1224 netdev_features_t features) 1225 { 1226 const netdev_features_t orig_features = netdev->features; 1227 struct gve_priv *priv = netdev_priv(netdev); 1228 int err; 1229 1230 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1231 netdev->features ^= NETIF_F_LRO; 1232 if (netif_carrier_ok(netdev)) { 1233 /* To make this process as simple as possible we 1234 * teardown the device, set the new configuration, 1235 * and then bring the device up again. 1236 */ 1237 err = gve_close(netdev); 1238 /* We have already tried to reset in close, just fail 1239 * at this point. 1240 */ 1241 if (err) 1242 goto err; 1243 1244 err = gve_open(netdev); 1245 if (err) 1246 goto err; 1247 } 1248 } 1249 1250 return 0; 1251 err: 1252 /* Reverts the change on error. */ 1253 netdev->features = orig_features; 1254 netif_err(priv, drv, netdev, 1255 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1256 return err; 1257 } 1258 1259 static const struct net_device_ops gve_netdev_ops = { 1260 .ndo_start_xmit = gve_start_xmit, 1261 .ndo_open = gve_open, 1262 .ndo_stop = gve_close, 1263 .ndo_get_stats64 = gve_get_stats, 1264 .ndo_tx_timeout = gve_tx_timeout, 1265 .ndo_set_features = gve_set_features, 1266 }; 1267 1268 static void gve_handle_status(struct gve_priv *priv, u32 status) 1269 { 1270 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1271 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1272 gve_set_do_reset(priv); 1273 } 1274 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1275 priv->stats_report_trigger_cnt++; 1276 gve_set_do_report_stats(priv); 1277 } 1278 } 1279 1280 static void gve_handle_reset(struct gve_priv *priv) 1281 { 1282 /* A service task will be scheduled at the end of probe to catch any 1283 * resets that need to happen, and we don't want to reset until 1284 * probe is done. 1285 */ 1286 if (gve_get_probe_in_progress(priv)) 1287 return; 1288 1289 if (gve_get_do_reset(priv)) { 1290 rtnl_lock(); 1291 gve_reset(priv, false); 1292 rtnl_unlock(); 1293 } 1294 } 1295 1296 void gve_handle_report_stats(struct gve_priv *priv) 1297 { 1298 struct stats *stats = priv->stats_report->stats; 1299 int idx, stats_idx = 0; 1300 unsigned int start = 0; 1301 u64 tx_bytes; 1302 1303 if (!gve_get_report_stats(priv)) 1304 return; 1305 1306 be64_add_cpu(&priv->stats_report->written_count, 1); 1307 /* tx stats */ 1308 if (priv->tx) { 1309 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 1310 u32 last_completion = 0; 1311 u32 tx_frames = 0; 1312 1313 /* DQO doesn't currently support these metrics. */ 1314 if (gve_is_gqi(priv)) { 1315 last_completion = priv->tx[idx].done; 1316 tx_frames = priv->tx[idx].req; 1317 } 1318 1319 do { 1320 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1321 tx_bytes = priv->tx[idx].bytes_done; 1322 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1323 stats[stats_idx++] = (struct stats) { 1324 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1325 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1326 .queue_id = cpu_to_be32(idx), 1327 }; 1328 stats[stats_idx++] = (struct stats) { 1329 .stat_name = cpu_to_be32(TX_STOP_CNT), 1330 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1331 .queue_id = cpu_to_be32(idx), 1332 }; 1333 stats[stats_idx++] = (struct stats) { 1334 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1335 .value = cpu_to_be64(tx_frames), 1336 .queue_id = cpu_to_be32(idx), 1337 }; 1338 stats[stats_idx++] = (struct stats) { 1339 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1340 .value = cpu_to_be64(tx_bytes), 1341 .queue_id = cpu_to_be32(idx), 1342 }; 1343 stats[stats_idx++] = (struct stats) { 1344 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1345 .value = cpu_to_be64(last_completion), 1346 .queue_id = cpu_to_be32(idx), 1347 }; 1348 stats[stats_idx++] = (struct stats) { 1349 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1350 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1351 .queue_id = cpu_to_be32(idx), 1352 }; 1353 } 1354 } 1355 /* rx stats */ 1356 if (priv->rx) { 1357 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1358 stats[stats_idx++] = (struct stats) { 1359 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1360 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1361 .queue_id = cpu_to_be32(idx), 1362 }; 1363 stats[stats_idx++] = (struct stats) { 1364 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1365 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1366 .queue_id = cpu_to_be32(idx), 1367 }; 1368 } 1369 } 1370 } 1371 1372 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1373 { 1374 if (!gve_get_napi_enabled(priv)) 1375 return; 1376 1377 if (link_status == netif_carrier_ok(priv->dev)) 1378 return; 1379 1380 if (link_status) { 1381 netdev_info(priv->dev, "Device link is up.\n"); 1382 netif_carrier_on(priv->dev); 1383 } else { 1384 netdev_info(priv->dev, "Device link is down.\n"); 1385 netif_carrier_off(priv->dev); 1386 } 1387 } 1388 1389 /* Handle NIC status register changes, reset requests and report stats */ 1390 static void gve_service_task(struct work_struct *work) 1391 { 1392 struct gve_priv *priv = container_of(work, struct gve_priv, 1393 service_task); 1394 u32 status = ioread32be(&priv->reg_bar0->device_status); 1395 1396 gve_handle_status(priv, status); 1397 1398 gve_handle_reset(priv); 1399 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1400 } 1401 1402 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 1403 { 1404 int num_ntfy; 1405 int err; 1406 1407 /* Set up the adminq */ 1408 err = gve_adminq_alloc(&priv->pdev->dev, priv); 1409 if (err) { 1410 dev_err(&priv->pdev->dev, 1411 "Failed to alloc admin queue: err=%d\n", err); 1412 return err; 1413 } 1414 1415 err = gve_verify_driver_compatibility(priv); 1416 if (err) { 1417 dev_err(&priv->pdev->dev, 1418 "Could not verify driver compatibility: err=%d\n", err); 1419 goto err; 1420 } 1421 1422 if (skip_describe_device) 1423 goto setup_device; 1424 1425 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 1426 /* Get the initial information we need from the device */ 1427 err = gve_adminq_describe_device(priv); 1428 if (err) { 1429 dev_err(&priv->pdev->dev, 1430 "Could not get device information: err=%d\n", err); 1431 goto err; 1432 } 1433 priv->dev->mtu = priv->dev->max_mtu; 1434 num_ntfy = pci_msix_vec_count(priv->pdev); 1435 if (num_ntfy <= 0) { 1436 dev_err(&priv->pdev->dev, 1437 "could not count MSI-x vectors: err=%d\n", num_ntfy); 1438 err = num_ntfy; 1439 goto err; 1440 } else if (num_ntfy < GVE_MIN_MSIX) { 1441 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 1442 GVE_MIN_MSIX, num_ntfy); 1443 err = -EINVAL; 1444 goto err; 1445 } 1446 1447 priv->num_registered_pages = 0; 1448 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 1449 /* gvnic has one Notification Block per MSI-x vector, except for the 1450 * management vector 1451 */ 1452 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 1453 priv->mgmt_msix_idx = priv->num_ntfy_blks; 1454 1455 priv->tx_cfg.max_queues = 1456 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 1457 priv->rx_cfg.max_queues = 1458 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 1459 1460 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 1461 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 1462 if (priv->default_num_queues > 0) { 1463 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 1464 priv->tx_cfg.num_queues); 1465 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 1466 priv->rx_cfg.num_queues); 1467 } 1468 1469 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 1470 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 1471 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 1472 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 1473 1474 if (!gve_is_gqi(priv)) { 1475 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 1476 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 1477 } 1478 1479 setup_device: 1480 err = gve_setup_device_resources(priv); 1481 if (!err) 1482 return 0; 1483 err: 1484 gve_adminq_free(&priv->pdev->dev, priv); 1485 return err; 1486 } 1487 1488 static void gve_teardown_priv_resources(struct gve_priv *priv) 1489 { 1490 gve_teardown_device_resources(priv); 1491 gve_adminq_free(&priv->pdev->dev, priv); 1492 } 1493 1494 static void gve_trigger_reset(struct gve_priv *priv) 1495 { 1496 /* Reset the device by releasing the AQ */ 1497 gve_adminq_release(priv); 1498 } 1499 1500 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 1501 { 1502 gve_trigger_reset(priv); 1503 /* With the reset having already happened, close cannot fail */ 1504 if (was_up) 1505 gve_close(priv->dev); 1506 gve_teardown_priv_resources(priv); 1507 } 1508 1509 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 1510 { 1511 int err; 1512 1513 err = gve_init_priv(priv, true); 1514 if (err) 1515 goto err; 1516 if (was_up) { 1517 err = gve_open(priv->dev); 1518 if (err) 1519 goto err; 1520 } 1521 return 0; 1522 err: 1523 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 1524 gve_turndown(priv); 1525 return err; 1526 } 1527 1528 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 1529 { 1530 bool was_up = netif_carrier_ok(priv->dev); 1531 int err; 1532 1533 dev_info(&priv->pdev->dev, "Performing reset\n"); 1534 gve_clear_do_reset(priv); 1535 gve_set_reset_in_progress(priv); 1536 /* If we aren't attempting to teardown normally, just go turndown and 1537 * reset right away. 1538 */ 1539 if (!attempt_teardown) { 1540 gve_turndown(priv); 1541 gve_reset_and_teardown(priv, was_up); 1542 } else { 1543 /* Otherwise attempt to close normally */ 1544 if (was_up) { 1545 err = gve_close(priv->dev); 1546 /* If that fails reset as we did above */ 1547 if (err) 1548 gve_reset_and_teardown(priv, was_up); 1549 } 1550 /* Clean up any remaining resources */ 1551 gve_teardown_priv_resources(priv); 1552 } 1553 1554 /* Set it all back up */ 1555 err = gve_reset_recovery(priv, was_up); 1556 gve_clear_reset_in_progress(priv); 1557 priv->reset_cnt++; 1558 priv->interface_up_cnt = 0; 1559 priv->interface_down_cnt = 0; 1560 priv->stats_report_trigger_cnt = 0; 1561 return err; 1562 } 1563 1564 static void gve_write_version(u8 __iomem *driver_version_register) 1565 { 1566 const char *c = gve_version_prefix; 1567 1568 while (*c) { 1569 writeb(*c, driver_version_register); 1570 c++; 1571 } 1572 1573 c = gve_version_str; 1574 while (*c) { 1575 writeb(*c, driver_version_register); 1576 c++; 1577 } 1578 writeb('\n', driver_version_register); 1579 } 1580 1581 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1582 { 1583 int max_tx_queues, max_rx_queues; 1584 struct net_device *dev; 1585 __be32 __iomem *db_bar; 1586 struct gve_registers __iomem *reg_bar; 1587 struct gve_priv *priv; 1588 int err; 1589 1590 err = pci_enable_device(pdev); 1591 if (err) 1592 return err; 1593 1594 err = pci_request_regions(pdev, "gvnic-cfg"); 1595 if (err) 1596 goto abort_with_enabled; 1597 1598 pci_set_master(pdev); 1599 1600 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 1601 if (err) { 1602 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 1603 goto abort_with_pci_region; 1604 } 1605 1606 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 1607 if (!reg_bar) { 1608 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 1609 err = -ENOMEM; 1610 goto abort_with_pci_region; 1611 } 1612 1613 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 1614 if (!db_bar) { 1615 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 1616 err = -ENOMEM; 1617 goto abort_with_reg_bar; 1618 } 1619 1620 gve_write_version(®_bar->driver_version); 1621 /* Get max queues to alloc etherdev */ 1622 max_tx_queues = ioread32be(®_bar->max_tx_queues); 1623 max_rx_queues = ioread32be(®_bar->max_rx_queues); 1624 /* Alloc and setup the netdev and priv */ 1625 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 1626 if (!dev) { 1627 dev_err(&pdev->dev, "could not allocate netdev\n"); 1628 err = -ENOMEM; 1629 goto abort_with_db_bar; 1630 } 1631 SET_NETDEV_DEV(dev, &pdev->dev); 1632 pci_set_drvdata(pdev, dev); 1633 dev->ethtool_ops = &gve_ethtool_ops; 1634 dev->netdev_ops = &gve_netdev_ops; 1635 1636 /* Set default and supported features. 1637 * 1638 * Features might be set in other locations as well (such as 1639 * `gve_adminq_describe_device`). 1640 */ 1641 dev->hw_features = NETIF_F_HIGHDMA; 1642 dev->hw_features |= NETIF_F_SG; 1643 dev->hw_features |= NETIF_F_HW_CSUM; 1644 dev->hw_features |= NETIF_F_TSO; 1645 dev->hw_features |= NETIF_F_TSO6; 1646 dev->hw_features |= NETIF_F_TSO_ECN; 1647 dev->hw_features |= NETIF_F_RXCSUM; 1648 dev->hw_features |= NETIF_F_RXHASH; 1649 dev->features = dev->hw_features; 1650 dev->watchdog_timeo = 5 * HZ; 1651 dev->min_mtu = ETH_MIN_MTU; 1652 netif_carrier_off(dev); 1653 1654 priv = netdev_priv(dev); 1655 priv->dev = dev; 1656 priv->pdev = pdev; 1657 priv->msg_enable = DEFAULT_MSG_LEVEL; 1658 priv->reg_bar0 = reg_bar; 1659 priv->db_bar2 = db_bar; 1660 priv->service_task_flags = 0x0; 1661 priv->state_flags = 0x0; 1662 priv->ethtool_flags = 0x0; 1663 1664 gve_set_probe_in_progress(priv); 1665 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 1666 if (!priv->gve_wq) { 1667 dev_err(&pdev->dev, "Could not allocate workqueue"); 1668 err = -ENOMEM; 1669 goto abort_with_netdev; 1670 } 1671 INIT_WORK(&priv->service_task, gve_service_task); 1672 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 1673 priv->tx_cfg.max_queues = max_tx_queues; 1674 priv->rx_cfg.max_queues = max_rx_queues; 1675 1676 err = gve_init_priv(priv, false); 1677 if (err) 1678 goto abort_with_wq; 1679 1680 err = register_netdev(dev); 1681 if (err) 1682 goto abort_with_gve_init; 1683 1684 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 1685 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 1686 gve_clear_probe_in_progress(priv); 1687 queue_work(priv->gve_wq, &priv->service_task); 1688 return 0; 1689 1690 abort_with_gve_init: 1691 gve_teardown_priv_resources(priv); 1692 1693 abort_with_wq: 1694 destroy_workqueue(priv->gve_wq); 1695 1696 abort_with_netdev: 1697 free_netdev(dev); 1698 1699 abort_with_db_bar: 1700 pci_iounmap(pdev, db_bar); 1701 1702 abort_with_reg_bar: 1703 pci_iounmap(pdev, reg_bar); 1704 1705 abort_with_pci_region: 1706 pci_release_regions(pdev); 1707 1708 abort_with_enabled: 1709 pci_disable_device(pdev); 1710 return err; 1711 } 1712 1713 static void gve_remove(struct pci_dev *pdev) 1714 { 1715 struct net_device *netdev = pci_get_drvdata(pdev); 1716 struct gve_priv *priv = netdev_priv(netdev); 1717 __be32 __iomem *db_bar = priv->db_bar2; 1718 void __iomem *reg_bar = priv->reg_bar0; 1719 1720 unregister_netdev(netdev); 1721 gve_teardown_priv_resources(priv); 1722 destroy_workqueue(priv->gve_wq); 1723 free_netdev(netdev); 1724 pci_iounmap(pdev, db_bar); 1725 pci_iounmap(pdev, reg_bar); 1726 pci_release_regions(pdev); 1727 pci_disable_device(pdev); 1728 } 1729 1730 static void gve_shutdown(struct pci_dev *pdev) 1731 { 1732 struct net_device *netdev = pci_get_drvdata(pdev); 1733 struct gve_priv *priv = netdev_priv(netdev); 1734 bool was_up = netif_carrier_ok(priv->dev); 1735 1736 rtnl_lock(); 1737 if (was_up && gve_close(priv->dev)) { 1738 /* If the dev was up, attempt to close, if close fails, reset */ 1739 gve_reset_and_teardown(priv, was_up); 1740 } else { 1741 /* If the dev wasn't up or close worked, finish tearing down */ 1742 gve_teardown_priv_resources(priv); 1743 } 1744 rtnl_unlock(); 1745 } 1746 1747 #ifdef CONFIG_PM 1748 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 1749 { 1750 struct net_device *netdev = pci_get_drvdata(pdev); 1751 struct gve_priv *priv = netdev_priv(netdev); 1752 bool was_up = netif_carrier_ok(priv->dev); 1753 1754 priv->suspend_cnt++; 1755 rtnl_lock(); 1756 if (was_up && gve_close(priv->dev)) { 1757 /* If the dev was up, attempt to close, if close fails, reset */ 1758 gve_reset_and_teardown(priv, was_up); 1759 } else { 1760 /* If the dev wasn't up or close worked, finish tearing down */ 1761 gve_teardown_priv_resources(priv); 1762 } 1763 priv->up_before_suspend = was_up; 1764 rtnl_unlock(); 1765 return 0; 1766 } 1767 1768 static int gve_resume(struct pci_dev *pdev) 1769 { 1770 struct net_device *netdev = pci_get_drvdata(pdev); 1771 struct gve_priv *priv = netdev_priv(netdev); 1772 int err; 1773 1774 priv->resume_cnt++; 1775 rtnl_lock(); 1776 err = gve_reset_recovery(priv, priv->up_before_suspend); 1777 rtnl_unlock(); 1778 return err; 1779 } 1780 #endif /* CONFIG_PM */ 1781 1782 static const struct pci_device_id gve_id_table[] = { 1783 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 1784 { } 1785 }; 1786 1787 static struct pci_driver gvnic_driver = { 1788 .name = "gvnic", 1789 .id_table = gve_id_table, 1790 .probe = gve_probe, 1791 .remove = gve_remove, 1792 .shutdown = gve_shutdown, 1793 #ifdef CONFIG_PM 1794 .suspend = gve_suspend, 1795 .resume = gve_resume, 1796 #endif 1797 }; 1798 1799 module_pci_driver(gvnic_driver); 1800 1801 MODULE_DEVICE_TABLE(pci, gve_id_table); 1802 MODULE_AUTHOR("Google, Inc."); 1803 MODULE_DESCRIPTION("gVNIC Driver"); 1804 MODULE_LICENSE("Dual MIT/GPL"); 1805 MODULE_VERSION(GVE_VERSION); 1806