1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 #define DQO_TX_MAX 0x3FFFF 35 36 const char gve_version_str[] = GVE_VERSION; 37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 38 39 static int gve_verify_driver_compatibility(struct gve_priv *priv) 40 { 41 int err; 42 struct gve_driver_info *driver_info; 43 dma_addr_t driver_info_bus; 44 45 driver_info = dma_alloc_coherent(&priv->pdev->dev, 46 sizeof(struct gve_driver_info), 47 &driver_info_bus, GFP_KERNEL); 48 if (!driver_info) 49 return -ENOMEM; 50 51 *driver_info = (struct gve_driver_info) { 52 .os_type = 1, /* Linux */ 53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 56 .driver_capability_flags = { 57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 61 }, 62 }; 63 strscpy(driver_info->os_version_str1, utsname()->release, 64 sizeof(driver_info->os_version_str1)); 65 strscpy(driver_info->os_version_str2, utsname()->version, 66 sizeof(driver_info->os_version_str2)); 67 68 err = gve_adminq_verify_driver_compatibility(priv, 69 sizeof(struct gve_driver_info), 70 driver_info_bus); 71 72 /* It's ok if the device doesn't support this */ 73 if (err == -EOPNOTSUPP) 74 err = 0; 75 76 dma_free_coherent(&priv->pdev->dev, 77 sizeof(struct gve_driver_info), 78 driver_info, driver_info_bus); 79 return err; 80 } 81 82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 83 { 84 struct gve_priv *priv = netdev_priv(dev); 85 86 if (gve_is_gqi(priv)) 87 return gve_tx(skb, dev); 88 else 89 return gve_tx_dqo(skb, dev); 90 } 91 92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 93 { 94 struct gve_priv *priv = netdev_priv(dev); 95 unsigned int start; 96 u64 packets, bytes; 97 int num_tx_queues; 98 int ring; 99 100 num_tx_queues = gve_num_tx_queues(priv); 101 if (priv->rx) { 102 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 103 do { 104 start = 105 u64_stats_fetch_begin(&priv->rx[ring].statss); 106 packets = priv->rx[ring].rpackets; 107 bytes = priv->rx[ring].rbytes; 108 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 109 start)); 110 s->rx_packets += packets; 111 s->rx_bytes += bytes; 112 } 113 } 114 if (priv->tx) { 115 for (ring = 0; ring < num_tx_queues; ring++) { 116 do { 117 start = 118 u64_stats_fetch_begin(&priv->tx[ring].statss); 119 packets = priv->tx[ring].pkt_done; 120 bytes = priv->tx[ring].bytes_done; 121 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 122 start)); 123 s->tx_packets += packets; 124 s->tx_bytes += bytes; 125 } 126 } 127 } 128 129 static int gve_alloc_counter_array(struct gve_priv *priv) 130 { 131 priv->counter_array = 132 dma_alloc_coherent(&priv->pdev->dev, 133 priv->num_event_counters * 134 sizeof(*priv->counter_array), 135 &priv->counter_array_bus, GFP_KERNEL); 136 if (!priv->counter_array) 137 return -ENOMEM; 138 139 return 0; 140 } 141 142 static void gve_free_counter_array(struct gve_priv *priv) 143 { 144 if (!priv->counter_array) 145 return; 146 147 dma_free_coherent(&priv->pdev->dev, 148 priv->num_event_counters * 149 sizeof(*priv->counter_array), 150 priv->counter_array, priv->counter_array_bus); 151 priv->counter_array = NULL; 152 } 153 154 /* NIC requests to report stats */ 155 static void gve_stats_report_task(struct work_struct *work) 156 { 157 struct gve_priv *priv = container_of(work, struct gve_priv, 158 stats_report_task); 159 if (gve_get_do_report_stats(priv)) { 160 gve_handle_report_stats(priv); 161 gve_clear_do_report_stats(priv); 162 } 163 } 164 165 static void gve_stats_report_schedule(struct gve_priv *priv) 166 { 167 if (!gve_get_probe_in_progress(priv) && 168 !gve_get_reset_in_progress(priv)) { 169 gve_set_do_report_stats(priv); 170 queue_work(priv->gve_wq, &priv->stats_report_task); 171 } 172 } 173 174 static void gve_stats_report_timer(struct timer_list *t) 175 { 176 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 177 178 mod_timer(&priv->stats_report_timer, 179 round_jiffies(jiffies + 180 msecs_to_jiffies(priv->stats_report_timer_period))); 181 gve_stats_report_schedule(priv); 182 } 183 184 static int gve_alloc_stats_report(struct gve_priv *priv) 185 { 186 int tx_stats_num, rx_stats_num; 187 188 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 189 gve_num_tx_queues(priv); 190 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 191 priv->rx_cfg.num_queues; 192 priv->stats_report_len = struct_size(priv->stats_report, stats, 193 tx_stats_num + rx_stats_num); 194 priv->stats_report = 195 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 196 &priv->stats_report_bus, GFP_KERNEL); 197 if (!priv->stats_report) 198 return -ENOMEM; 199 /* Set up timer for the report-stats task */ 200 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 201 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 202 return 0; 203 } 204 205 static void gve_free_stats_report(struct gve_priv *priv) 206 { 207 if (!priv->stats_report) 208 return; 209 210 del_timer_sync(&priv->stats_report_timer); 211 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 212 priv->stats_report, priv->stats_report_bus); 213 priv->stats_report = NULL; 214 } 215 216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 217 { 218 struct gve_priv *priv = arg; 219 220 queue_work(priv->gve_wq, &priv->service_task); 221 return IRQ_HANDLED; 222 } 223 224 static irqreturn_t gve_intr(int irq, void *arg) 225 { 226 struct gve_notify_block *block = arg; 227 struct gve_priv *priv = block->priv; 228 229 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 230 napi_schedule_irqoff(&block->napi); 231 return IRQ_HANDLED; 232 } 233 234 static irqreturn_t gve_intr_dqo(int irq, void *arg) 235 { 236 struct gve_notify_block *block = arg; 237 238 /* Interrupts are automatically masked */ 239 napi_schedule_irqoff(&block->napi); 240 return IRQ_HANDLED; 241 } 242 243 static int gve_napi_poll(struct napi_struct *napi, int budget) 244 { 245 struct gve_notify_block *block; 246 __be32 __iomem *irq_doorbell; 247 bool reschedule = false; 248 struct gve_priv *priv; 249 int work_done = 0; 250 251 block = container_of(napi, struct gve_notify_block, napi); 252 priv = block->priv; 253 254 if (block->tx) { 255 if (block->tx->q_num < priv->tx_cfg.num_queues) 256 reschedule |= gve_tx_poll(block, budget); 257 else 258 reschedule |= gve_xdp_poll(block, budget); 259 } 260 261 if (block->rx) { 262 work_done = gve_rx_poll(block, budget); 263 reschedule |= work_done == budget; 264 } 265 266 if (reschedule) 267 return budget; 268 269 /* Complete processing - don't unmask irq if busy polling is enabled */ 270 if (likely(napi_complete_done(napi, work_done))) { 271 irq_doorbell = gve_irq_doorbell(priv, block); 272 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 273 274 /* Ensure IRQ ACK is visible before we check pending work. 275 * If queue had issued updates, it would be truly visible. 276 */ 277 mb(); 278 279 if (block->tx) 280 reschedule |= gve_tx_clean_pending(priv, block->tx); 281 if (block->rx) 282 reschedule |= gve_rx_work_pending(block->rx); 283 284 if (reschedule && napi_reschedule(napi)) 285 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 286 } 287 return work_done; 288 } 289 290 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 291 { 292 struct gve_notify_block *block = 293 container_of(napi, struct gve_notify_block, napi); 294 struct gve_priv *priv = block->priv; 295 bool reschedule = false; 296 int work_done = 0; 297 298 if (block->tx) 299 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 300 301 if (block->rx) { 302 work_done = gve_rx_poll_dqo(block, budget); 303 reschedule |= work_done == budget; 304 } 305 306 if (reschedule) 307 return budget; 308 309 if (likely(napi_complete_done(napi, work_done))) { 310 /* Enable interrupts again. 311 * 312 * We don't need to repoll afterwards because HW supports the 313 * PCI MSI-X PBA feature. 314 * 315 * Another interrupt would be triggered if a new event came in 316 * since the last one. 317 */ 318 gve_write_irq_doorbell_dqo(priv, block, 319 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 320 } 321 322 return work_done; 323 } 324 325 static int gve_alloc_notify_blocks(struct gve_priv *priv) 326 { 327 int num_vecs_requested = priv->num_ntfy_blks + 1; 328 unsigned int active_cpus; 329 int vecs_enabled; 330 int i, j; 331 int err; 332 333 priv->msix_vectors = kvcalloc(num_vecs_requested, 334 sizeof(*priv->msix_vectors), GFP_KERNEL); 335 if (!priv->msix_vectors) 336 return -ENOMEM; 337 for (i = 0; i < num_vecs_requested; i++) 338 priv->msix_vectors[i].entry = i; 339 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 340 GVE_MIN_MSIX, num_vecs_requested); 341 if (vecs_enabled < 0) { 342 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 343 GVE_MIN_MSIX, vecs_enabled); 344 err = vecs_enabled; 345 goto abort_with_msix_vectors; 346 } 347 if (vecs_enabled != num_vecs_requested) { 348 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 349 int vecs_per_type = new_num_ntfy_blks / 2; 350 int vecs_left = new_num_ntfy_blks % 2; 351 352 priv->num_ntfy_blks = new_num_ntfy_blks; 353 priv->mgmt_msix_idx = priv->num_ntfy_blks; 354 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 355 vecs_per_type); 356 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 357 vecs_per_type + vecs_left); 358 dev_err(&priv->pdev->dev, 359 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 360 vecs_enabled, priv->tx_cfg.max_queues, 361 priv->rx_cfg.max_queues); 362 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 363 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 364 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 365 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 366 } 367 /* Half the notification blocks go to TX and half to RX */ 368 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 369 370 /* Setup Management Vector - the last vector */ 371 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 372 pci_name(priv->pdev)); 373 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 374 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 375 if (err) { 376 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 377 goto abort_with_msix_enabled; 378 } 379 priv->irq_db_indices = 380 dma_alloc_coherent(&priv->pdev->dev, 381 priv->num_ntfy_blks * 382 sizeof(*priv->irq_db_indices), 383 &priv->irq_db_indices_bus, GFP_KERNEL); 384 if (!priv->irq_db_indices) { 385 err = -ENOMEM; 386 goto abort_with_mgmt_vector; 387 } 388 389 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 390 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 391 if (!priv->ntfy_blocks) { 392 err = -ENOMEM; 393 goto abort_with_irq_db_indices; 394 } 395 396 /* Setup the other blocks - the first n-1 vectors */ 397 for (i = 0; i < priv->num_ntfy_blks; i++) { 398 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 399 int msix_idx = i; 400 401 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 402 i, pci_name(priv->pdev)); 403 block->priv = priv; 404 err = request_irq(priv->msix_vectors[msix_idx].vector, 405 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 406 0, block->name, block); 407 if (err) { 408 dev_err(&priv->pdev->dev, 409 "Failed to receive msix vector %d\n", i); 410 goto abort_with_some_ntfy_blocks; 411 } 412 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 413 get_cpu_mask(i % active_cpus)); 414 block->irq_db_index = &priv->irq_db_indices[i].index; 415 } 416 return 0; 417 abort_with_some_ntfy_blocks: 418 for (j = 0; j < i; j++) { 419 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 420 int msix_idx = j; 421 422 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 423 NULL); 424 free_irq(priv->msix_vectors[msix_idx].vector, block); 425 } 426 kvfree(priv->ntfy_blocks); 427 priv->ntfy_blocks = NULL; 428 abort_with_irq_db_indices: 429 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 430 sizeof(*priv->irq_db_indices), 431 priv->irq_db_indices, priv->irq_db_indices_bus); 432 priv->irq_db_indices = NULL; 433 abort_with_mgmt_vector: 434 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 435 abort_with_msix_enabled: 436 pci_disable_msix(priv->pdev); 437 abort_with_msix_vectors: 438 kvfree(priv->msix_vectors); 439 priv->msix_vectors = NULL; 440 return err; 441 } 442 443 static void gve_free_notify_blocks(struct gve_priv *priv) 444 { 445 int i; 446 447 if (!priv->msix_vectors) 448 return; 449 450 /* Free the irqs */ 451 for (i = 0; i < priv->num_ntfy_blks; i++) { 452 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 453 int msix_idx = i; 454 455 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 456 NULL); 457 free_irq(priv->msix_vectors[msix_idx].vector, block); 458 } 459 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 460 kvfree(priv->ntfy_blocks); 461 priv->ntfy_blocks = NULL; 462 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 463 sizeof(*priv->irq_db_indices), 464 priv->irq_db_indices, priv->irq_db_indices_bus); 465 priv->irq_db_indices = NULL; 466 pci_disable_msix(priv->pdev); 467 kvfree(priv->msix_vectors); 468 priv->msix_vectors = NULL; 469 } 470 471 static int gve_setup_device_resources(struct gve_priv *priv) 472 { 473 int err; 474 475 err = gve_alloc_counter_array(priv); 476 if (err) 477 return err; 478 err = gve_alloc_notify_blocks(priv); 479 if (err) 480 goto abort_with_counter; 481 err = gve_alloc_stats_report(priv); 482 if (err) 483 goto abort_with_ntfy_blocks; 484 err = gve_adminq_configure_device_resources(priv, 485 priv->counter_array_bus, 486 priv->num_event_counters, 487 priv->irq_db_indices_bus, 488 priv->num_ntfy_blks); 489 if (unlikely(err)) { 490 dev_err(&priv->pdev->dev, 491 "could not setup device_resources: err=%d\n", err); 492 err = -ENXIO; 493 goto abort_with_stats_report; 494 } 495 496 if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 497 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 498 GFP_KERNEL); 499 if (!priv->ptype_lut_dqo) { 500 err = -ENOMEM; 501 goto abort_with_stats_report; 502 } 503 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 504 if (err) { 505 dev_err(&priv->pdev->dev, 506 "Failed to get ptype map: err=%d\n", err); 507 goto abort_with_ptype_lut; 508 } 509 } 510 511 err = gve_adminq_report_stats(priv, priv->stats_report_len, 512 priv->stats_report_bus, 513 GVE_STATS_REPORT_TIMER_PERIOD); 514 if (err) 515 dev_err(&priv->pdev->dev, 516 "Failed to report stats: err=%d\n", err); 517 gve_set_device_resources_ok(priv); 518 return 0; 519 520 abort_with_ptype_lut: 521 kvfree(priv->ptype_lut_dqo); 522 priv->ptype_lut_dqo = NULL; 523 abort_with_stats_report: 524 gve_free_stats_report(priv); 525 abort_with_ntfy_blocks: 526 gve_free_notify_blocks(priv); 527 abort_with_counter: 528 gve_free_counter_array(priv); 529 530 return err; 531 } 532 533 static void gve_trigger_reset(struct gve_priv *priv); 534 535 static void gve_teardown_device_resources(struct gve_priv *priv) 536 { 537 int err; 538 539 /* Tell device its resources are being freed */ 540 if (gve_get_device_resources_ok(priv)) { 541 /* detach the stats report */ 542 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 543 if (err) { 544 dev_err(&priv->pdev->dev, 545 "Failed to detach stats report: err=%d\n", err); 546 gve_trigger_reset(priv); 547 } 548 err = gve_adminq_deconfigure_device_resources(priv); 549 if (err) { 550 dev_err(&priv->pdev->dev, 551 "Could not deconfigure device resources: err=%d\n", 552 err); 553 gve_trigger_reset(priv); 554 } 555 } 556 557 kvfree(priv->ptype_lut_dqo); 558 priv->ptype_lut_dqo = NULL; 559 560 gve_free_counter_array(priv); 561 gve_free_notify_blocks(priv); 562 gve_free_stats_report(priv); 563 gve_clear_device_resources_ok(priv); 564 } 565 566 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 567 int (*gve_poll)(struct napi_struct *, int)) 568 { 569 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 570 571 netif_napi_add(priv->dev, &block->napi, gve_poll); 572 } 573 574 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 575 { 576 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 577 578 netif_napi_del(&block->napi); 579 } 580 581 static int gve_register_xdp_qpls(struct gve_priv *priv) 582 { 583 int start_id; 584 int err; 585 int i; 586 587 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 588 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 589 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 590 if (err) { 591 netif_err(priv, drv, priv->dev, 592 "failed to register queue page list %d\n", 593 priv->qpls[i].id); 594 /* This failure will trigger a reset - no need to clean 595 * up 596 */ 597 return err; 598 } 599 } 600 return 0; 601 } 602 603 static int gve_register_qpls(struct gve_priv *priv) 604 { 605 int start_id; 606 int err; 607 int i; 608 609 start_id = gve_tx_start_qpl_id(priv); 610 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 611 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 612 if (err) { 613 netif_err(priv, drv, priv->dev, 614 "failed to register queue page list %d\n", 615 priv->qpls[i].id); 616 /* This failure will trigger a reset - no need to clean 617 * up 618 */ 619 return err; 620 } 621 } 622 623 start_id = gve_rx_start_qpl_id(priv); 624 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 625 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 626 if (err) { 627 netif_err(priv, drv, priv->dev, 628 "failed to register queue page list %d\n", 629 priv->qpls[i].id); 630 /* This failure will trigger a reset - no need to clean 631 * up 632 */ 633 return err; 634 } 635 } 636 return 0; 637 } 638 639 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 640 { 641 int start_id; 642 int err; 643 int i; 644 645 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 646 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 647 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 648 /* This failure will trigger a reset - no need to clean up */ 649 if (err) { 650 netif_err(priv, drv, priv->dev, 651 "Failed to unregister queue page list %d\n", 652 priv->qpls[i].id); 653 return err; 654 } 655 } 656 return 0; 657 } 658 659 static int gve_unregister_qpls(struct gve_priv *priv) 660 { 661 int start_id; 662 int err; 663 int i; 664 665 start_id = gve_tx_start_qpl_id(priv); 666 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 667 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 668 /* This failure will trigger a reset - no need to clean up */ 669 if (err) { 670 netif_err(priv, drv, priv->dev, 671 "Failed to unregister queue page list %d\n", 672 priv->qpls[i].id); 673 return err; 674 } 675 } 676 677 start_id = gve_rx_start_qpl_id(priv); 678 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 679 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 680 /* This failure will trigger a reset - no need to clean up */ 681 if (err) { 682 netif_err(priv, drv, priv->dev, 683 "Failed to unregister queue page list %d\n", 684 priv->qpls[i].id); 685 return err; 686 } 687 } 688 return 0; 689 } 690 691 static int gve_create_xdp_rings(struct gve_priv *priv) 692 { 693 int err; 694 695 err = gve_adminq_create_tx_queues(priv, 696 gve_xdp_tx_start_queue_id(priv), 697 priv->num_xdp_queues); 698 if (err) { 699 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 700 priv->num_xdp_queues); 701 /* This failure will trigger a reset - no need to clean 702 * up 703 */ 704 return err; 705 } 706 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 707 priv->num_xdp_queues); 708 709 return 0; 710 } 711 712 static int gve_create_rings(struct gve_priv *priv) 713 { 714 int num_tx_queues = gve_num_tx_queues(priv); 715 int err; 716 int i; 717 718 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 719 if (err) { 720 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 721 num_tx_queues); 722 /* This failure will trigger a reset - no need to clean 723 * up 724 */ 725 return err; 726 } 727 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 728 num_tx_queues); 729 730 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 731 if (err) { 732 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 733 priv->rx_cfg.num_queues); 734 /* This failure will trigger a reset - no need to clean 735 * up 736 */ 737 return err; 738 } 739 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 740 priv->rx_cfg.num_queues); 741 742 if (gve_is_gqi(priv)) { 743 /* Rx data ring has been prefilled with packet buffers at queue 744 * allocation time. 745 * 746 * Write the doorbell to provide descriptor slots and packet 747 * buffers to the NIC. 748 */ 749 for (i = 0; i < priv->rx_cfg.num_queues; i++) 750 gve_rx_write_doorbell(priv, &priv->rx[i]); 751 } else { 752 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 753 /* Post buffers and ring doorbell. */ 754 gve_rx_post_buffers_dqo(&priv->rx[i]); 755 } 756 } 757 758 return 0; 759 } 760 761 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 762 int (*napi_poll)(struct napi_struct *napi, 763 int budget)) 764 { 765 int start_id = gve_xdp_tx_start_queue_id(priv); 766 int i; 767 768 /* Add xdp tx napi & init sync stats*/ 769 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 770 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 771 772 u64_stats_init(&priv->tx[i].statss); 773 priv->tx[i].ntfy_id = ntfy_idx; 774 gve_add_napi(priv, ntfy_idx, napi_poll); 775 } 776 } 777 778 static void add_napi_init_sync_stats(struct gve_priv *priv, 779 int (*napi_poll)(struct napi_struct *napi, 780 int budget)) 781 { 782 int i; 783 784 /* Add tx napi & init sync stats*/ 785 for (i = 0; i < gve_num_tx_queues(priv); i++) { 786 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 787 788 u64_stats_init(&priv->tx[i].statss); 789 priv->tx[i].ntfy_id = ntfy_idx; 790 gve_add_napi(priv, ntfy_idx, napi_poll); 791 } 792 /* Add rx napi & init sync stats*/ 793 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 794 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 795 796 u64_stats_init(&priv->rx[i].statss); 797 priv->rx[i].ntfy_id = ntfy_idx; 798 gve_add_napi(priv, ntfy_idx, napi_poll); 799 } 800 } 801 802 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 803 { 804 if (gve_is_gqi(priv)) { 805 gve_tx_free_rings_gqi(priv, start_id, num_rings); 806 } else { 807 gve_tx_free_rings_dqo(priv); 808 } 809 } 810 811 static int gve_alloc_xdp_rings(struct gve_priv *priv) 812 { 813 int start_id; 814 int err = 0; 815 816 if (!priv->num_xdp_queues) 817 return 0; 818 819 start_id = gve_xdp_tx_start_queue_id(priv); 820 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 821 if (err) 822 return err; 823 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 824 825 return 0; 826 } 827 828 static int gve_alloc_rings(struct gve_priv *priv) 829 { 830 int err; 831 832 /* Setup tx rings */ 833 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 834 GFP_KERNEL); 835 if (!priv->tx) 836 return -ENOMEM; 837 838 if (gve_is_gqi(priv)) 839 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 840 else 841 err = gve_tx_alloc_rings_dqo(priv); 842 if (err) 843 goto free_tx; 844 845 /* Setup rx rings */ 846 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 847 GFP_KERNEL); 848 if (!priv->rx) { 849 err = -ENOMEM; 850 goto free_tx_queue; 851 } 852 853 if (gve_is_gqi(priv)) 854 err = gve_rx_alloc_rings(priv); 855 else 856 err = gve_rx_alloc_rings_dqo(priv); 857 if (err) 858 goto free_rx; 859 860 if (gve_is_gqi(priv)) 861 add_napi_init_sync_stats(priv, gve_napi_poll); 862 else 863 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 864 865 return 0; 866 867 free_rx: 868 kvfree(priv->rx); 869 priv->rx = NULL; 870 free_tx_queue: 871 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 872 free_tx: 873 kvfree(priv->tx); 874 priv->tx = NULL; 875 return err; 876 } 877 878 static int gve_destroy_xdp_rings(struct gve_priv *priv) 879 { 880 int start_id; 881 int err; 882 883 start_id = gve_xdp_tx_start_queue_id(priv); 884 err = gve_adminq_destroy_tx_queues(priv, 885 start_id, 886 priv->num_xdp_queues); 887 if (err) { 888 netif_err(priv, drv, priv->dev, 889 "failed to destroy XDP queues\n"); 890 /* This failure will trigger a reset - no need to clean up */ 891 return err; 892 } 893 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 894 895 return 0; 896 } 897 898 static int gve_destroy_rings(struct gve_priv *priv) 899 { 900 int num_tx_queues = gve_num_tx_queues(priv); 901 int err; 902 903 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 904 if (err) { 905 netif_err(priv, drv, priv->dev, 906 "failed to destroy tx queues\n"); 907 /* This failure will trigger a reset - no need to clean up */ 908 return err; 909 } 910 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 911 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 912 if (err) { 913 netif_err(priv, drv, priv->dev, 914 "failed to destroy rx queues\n"); 915 /* This failure will trigger a reset - no need to clean up */ 916 return err; 917 } 918 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 919 return 0; 920 } 921 922 static void gve_rx_free_rings(struct gve_priv *priv) 923 { 924 if (gve_is_gqi(priv)) 925 gve_rx_free_rings_gqi(priv); 926 else 927 gve_rx_free_rings_dqo(priv); 928 } 929 930 static void gve_free_xdp_rings(struct gve_priv *priv) 931 { 932 int ntfy_idx, start_id; 933 int i; 934 935 start_id = gve_xdp_tx_start_queue_id(priv); 936 if (priv->tx) { 937 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 938 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 939 gve_remove_napi(priv, ntfy_idx); 940 } 941 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 942 } 943 } 944 945 static void gve_free_rings(struct gve_priv *priv) 946 { 947 int num_tx_queues = gve_num_tx_queues(priv); 948 int ntfy_idx; 949 int i; 950 951 if (priv->tx) { 952 for (i = 0; i < num_tx_queues; i++) { 953 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 954 gve_remove_napi(priv, ntfy_idx); 955 } 956 gve_tx_free_rings(priv, 0, num_tx_queues); 957 kvfree(priv->tx); 958 priv->tx = NULL; 959 } 960 if (priv->rx) { 961 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 962 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 963 gve_remove_napi(priv, ntfy_idx); 964 } 965 gve_rx_free_rings(priv); 966 kvfree(priv->rx); 967 priv->rx = NULL; 968 } 969 } 970 971 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 972 struct page **page, dma_addr_t *dma, 973 enum dma_data_direction dir, gfp_t gfp_flags) 974 { 975 *page = alloc_page(gfp_flags); 976 if (!*page) { 977 priv->page_alloc_fail++; 978 return -ENOMEM; 979 } 980 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 981 if (dma_mapping_error(dev, *dma)) { 982 priv->dma_mapping_error++; 983 put_page(*page); 984 return -ENOMEM; 985 } 986 return 0; 987 } 988 989 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 990 int pages) 991 { 992 struct gve_queue_page_list *qpl = &priv->qpls[id]; 993 int err; 994 int i; 995 996 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 997 netif_err(priv, drv, priv->dev, 998 "Reached max number of registered pages %llu > %llu\n", 999 pages + priv->num_registered_pages, 1000 priv->max_registered_pages); 1001 return -EINVAL; 1002 } 1003 1004 qpl->id = id; 1005 qpl->num_entries = 0; 1006 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1007 /* caller handles clean up */ 1008 if (!qpl->pages) 1009 return -ENOMEM; 1010 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1011 /* caller handles clean up */ 1012 if (!qpl->page_buses) 1013 return -ENOMEM; 1014 1015 for (i = 0; i < pages; i++) { 1016 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1017 &qpl->page_buses[i], 1018 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1019 /* caller handles clean up */ 1020 if (err) 1021 return -ENOMEM; 1022 qpl->num_entries++; 1023 } 1024 priv->num_registered_pages += pages; 1025 1026 return 0; 1027 } 1028 1029 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1030 enum dma_data_direction dir) 1031 { 1032 if (!dma_mapping_error(dev, dma)) 1033 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1034 if (page) 1035 put_page(page); 1036 } 1037 1038 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1039 { 1040 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1041 int i; 1042 1043 if (!qpl->pages) 1044 return; 1045 if (!qpl->page_buses) 1046 goto free_pages; 1047 1048 for (i = 0; i < qpl->num_entries; i++) 1049 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1050 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1051 1052 kvfree(qpl->page_buses); 1053 qpl->page_buses = NULL; 1054 free_pages: 1055 kvfree(qpl->pages); 1056 qpl->pages = NULL; 1057 priv->num_registered_pages -= qpl->num_entries; 1058 } 1059 1060 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1061 { 1062 int start_id; 1063 int i, j; 1064 int err; 1065 1066 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1067 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1068 err = gve_alloc_queue_page_list(priv, i, 1069 priv->tx_pages_per_qpl); 1070 if (err) 1071 goto free_qpls; 1072 } 1073 1074 return 0; 1075 1076 free_qpls: 1077 for (j = start_id; j <= i; j++) 1078 gve_free_queue_page_list(priv, j); 1079 return err; 1080 } 1081 1082 static int gve_alloc_qpls(struct gve_priv *priv) 1083 { 1084 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1085 int start_id; 1086 int i, j; 1087 int err; 1088 1089 if (priv->queue_format != GVE_GQI_QPL_FORMAT) 1090 return 0; 1091 1092 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1093 if (!priv->qpls) 1094 return -ENOMEM; 1095 1096 start_id = gve_tx_start_qpl_id(priv); 1097 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1098 err = gve_alloc_queue_page_list(priv, i, 1099 priv->tx_pages_per_qpl); 1100 if (err) 1101 goto free_qpls; 1102 } 1103 1104 start_id = gve_rx_start_qpl_id(priv); 1105 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1106 err = gve_alloc_queue_page_list(priv, i, 1107 priv->rx_data_slot_cnt); 1108 if (err) 1109 goto free_qpls; 1110 } 1111 1112 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1113 sizeof(unsigned long) * BITS_PER_BYTE; 1114 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1115 sizeof(unsigned long), GFP_KERNEL); 1116 if (!priv->qpl_cfg.qpl_id_map) { 1117 err = -ENOMEM; 1118 goto free_qpls; 1119 } 1120 1121 return 0; 1122 1123 free_qpls: 1124 for (j = 0; j <= i; j++) 1125 gve_free_queue_page_list(priv, j); 1126 kvfree(priv->qpls); 1127 priv->qpls = NULL; 1128 return err; 1129 } 1130 1131 static void gve_free_xdp_qpls(struct gve_priv *priv) 1132 { 1133 int start_id; 1134 int i; 1135 1136 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1137 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1138 gve_free_queue_page_list(priv, i); 1139 } 1140 1141 static void gve_free_qpls(struct gve_priv *priv) 1142 { 1143 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1144 int i; 1145 1146 if (!priv->qpls) 1147 return; 1148 1149 kvfree(priv->qpl_cfg.qpl_id_map); 1150 priv->qpl_cfg.qpl_id_map = NULL; 1151 1152 for (i = 0; i < max_queues; i++) 1153 gve_free_queue_page_list(priv, i); 1154 1155 kvfree(priv->qpls); 1156 priv->qpls = NULL; 1157 } 1158 1159 /* Use this to schedule a reset when the device is capable of continuing 1160 * to handle other requests in its current state. If it is not, do a reset 1161 * in thread instead. 1162 */ 1163 void gve_schedule_reset(struct gve_priv *priv) 1164 { 1165 gve_set_do_reset(priv); 1166 queue_work(priv->gve_wq, &priv->service_task); 1167 } 1168 1169 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1170 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1171 static void gve_turndown(struct gve_priv *priv); 1172 static void gve_turnup(struct gve_priv *priv); 1173 1174 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1175 { 1176 struct napi_struct *napi; 1177 struct gve_rx_ring *rx; 1178 int err = 0; 1179 int i, j; 1180 u32 tx_qid; 1181 1182 if (!priv->num_xdp_queues) 1183 return 0; 1184 1185 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1186 rx = &priv->rx[i]; 1187 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1188 1189 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1190 napi->napi_id); 1191 if (err) 1192 goto err; 1193 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1194 MEM_TYPE_PAGE_SHARED, NULL); 1195 if (err) 1196 goto err; 1197 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1198 if (rx->xsk_pool) { 1199 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1200 napi->napi_id); 1201 if (err) 1202 goto err; 1203 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1204 MEM_TYPE_XSK_BUFF_POOL, NULL); 1205 if (err) 1206 goto err; 1207 xsk_pool_set_rxq_info(rx->xsk_pool, 1208 &rx->xsk_rxq); 1209 } 1210 } 1211 1212 for (i = 0; i < priv->num_xdp_queues; i++) { 1213 tx_qid = gve_xdp_tx_queue_id(priv, i); 1214 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1215 } 1216 return 0; 1217 1218 err: 1219 for (j = i; j >= 0; j--) { 1220 rx = &priv->rx[j]; 1221 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1222 xdp_rxq_info_unreg(&rx->xdp_rxq); 1223 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1224 xdp_rxq_info_unreg(&rx->xsk_rxq); 1225 } 1226 return err; 1227 } 1228 1229 static void gve_unreg_xdp_info(struct gve_priv *priv) 1230 { 1231 int i, tx_qid; 1232 1233 if (!priv->num_xdp_queues) 1234 return; 1235 1236 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1237 struct gve_rx_ring *rx = &priv->rx[i]; 1238 1239 xdp_rxq_info_unreg(&rx->xdp_rxq); 1240 if (rx->xsk_pool) { 1241 xdp_rxq_info_unreg(&rx->xsk_rxq); 1242 rx->xsk_pool = NULL; 1243 } 1244 } 1245 1246 for (i = 0; i < priv->num_xdp_queues; i++) { 1247 tx_qid = gve_xdp_tx_queue_id(priv, i); 1248 priv->tx[tx_qid].xsk_pool = NULL; 1249 } 1250 } 1251 1252 static void gve_drain_page_cache(struct gve_priv *priv) 1253 { 1254 struct page_frag_cache *nc; 1255 int i; 1256 1257 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1258 nc = &priv->rx[i].page_cache; 1259 if (nc->va) { 1260 __page_frag_cache_drain(virt_to_page(nc->va), 1261 nc->pagecnt_bias); 1262 nc->va = NULL; 1263 } 1264 } 1265 } 1266 1267 static int gve_open(struct net_device *dev) 1268 { 1269 struct gve_priv *priv = netdev_priv(dev); 1270 int err; 1271 1272 if (priv->xdp_prog) 1273 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1274 else 1275 priv->num_xdp_queues = 0; 1276 1277 err = gve_alloc_qpls(priv); 1278 if (err) 1279 return err; 1280 1281 err = gve_alloc_rings(priv); 1282 if (err) 1283 goto free_qpls; 1284 1285 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1286 if (err) 1287 goto free_rings; 1288 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1289 if (err) 1290 goto free_rings; 1291 1292 err = gve_reg_xdp_info(priv, dev); 1293 if (err) 1294 goto free_rings; 1295 1296 err = gve_register_qpls(priv); 1297 if (err) 1298 goto reset; 1299 1300 if (!gve_is_gqi(priv)) { 1301 /* Hard code this for now. This may be tuned in the future for 1302 * performance. 1303 */ 1304 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1305 } 1306 err = gve_create_rings(priv); 1307 if (err) 1308 goto reset; 1309 1310 gve_set_device_rings_ok(priv); 1311 1312 if (gve_get_report_stats(priv)) 1313 mod_timer(&priv->stats_report_timer, 1314 round_jiffies(jiffies + 1315 msecs_to_jiffies(priv->stats_report_timer_period))); 1316 1317 gve_turnup(priv); 1318 queue_work(priv->gve_wq, &priv->service_task); 1319 priv->interface_up_cnt++; 1320 return 0; 1321 1322 free_rings: 1323 gve_free_rings(priv); 1324 free_qpls: 1325 gve_free_qpls(priv); 1326 return err; 1327 1328 reset: 1329 /* This must have been called from a reset due to the rtnl lock 1330 * so just return at this point. 1331 */ 1332 if (gve_get_reset_in_progress(priv)) 1333 return err; 1334 /* Otherwise reset before returning */ 1335 gve_reset_and_teardown(priv, true); 1336 /* if this fails there is nothing we can do so just ignore the return */ 1337 gve_reset_recovery(priv, false); 1338 /* return the original error */ 1339 return err; 1340 } 1341 1342 static int gve_close(struct net_device *dev) 1343 { 1344 struct gve_priv *priv = netdev_priv(dev); 1345 int err; 1346 1347 netif_carrier_off(dev); 1348 if (gve_get_device_rings_ok(priv)) { 1349 gve_turndown(priv); 1350 gve_drain_page_cache(priv); 1351 err = gve_destroy_rings(priv); 1352 if (err) 1353 goto err; 1354 err = gve_unregister_qpls(priv); 1355 if (err) 1356 goto err; 1357 gve_clear_device_rings_ok(priv); 1358 } 1359 del_timer_sync(&priv->stats_report_timer); 1360 1361 gve_unreg_xdp_info(priv); 1362 gve_free_rings(priv); 1363 gve_free_qpls(priv); 1364 priv->interface_down_cnt++; 1365 return 0; 1366 1367 err: 1368 /* This must have been called from a reset due to the rtnl lock 1369 * so just return at this point. 1370 */ 1371 if (gve_get_reset_in_progress(priv)) 1372 return err; 1373 /* Otherwise reset before returning */ 1374 gve_reset_and_teardown(priv, true); 1375 return gve_reset_recovery(priv, false); 1376 } 1377 1378 static int gve_remove_xdp_queues(struct gve_priv *priv) 1379 { 1380 int err; 1381 1382 err = gve_destroy_xdp_rings(priv); 1383 if (err) 1384 return err; 1385 1386 err = gve_unregister_xdp_qpls(priv); 1387 if (err) 1388 return err; 1389 1390 gve_unreg_xdp_info(priv); 1391 gve_free_xdp_rings(priv); 1392 gve_free_xdp_qpls(priv); 1393 priv->num_xdp_queues = 0; 1394 return 0; 1395 } 1396 1397 static int gve_add_xdp_queues(struct gve_priv *priv) 1398 { 1399 int err; 1400 1401 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1402 1403 err = gve_alloc_xdp_qpls(priv); 1404 if (err) 1405 goto err; 1406 1407 err = gve_alloc_xdp_rings(priv); 1408 if (err) 1409 goto free_xdp_qpls; 1410 1411 err = gve_reg_xdp_info(priv, priv->dev); 1412 if (err) 1413 goto free_xdp_rings; 1414 1415 err = gve_register_xdp_qpls(priv); 1416 if (err) 1417 goto free_xdp_rings; 1418 1419 err = gve_create_xdp_rings(priv); 1420 if (err) 1421 goto free_xdp_rings; 1422 1423 return 0; 1424 1425 free_xdp_rings: 1426 gve_free_xdp_rings(priv); 1427 free_xdp_qpls: 1428 gve_free_xdp_qpls(priv); 1429 err: 1430 priv->num_xdp_queues = 0; 1431 return err; 1432 } 1433 1434 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1435 { 1436 if (!gve_get_napi_enabled(priv)) 1437 return; 1438 1439 if (link_status == netif_carrier_ok(priv->dev)) 1440 return; 1441 1442 if (link_status) { 1443 netdev_info(priv->dev, "Device link is up.\n"); 1444 netif_carrier_on(priv->dev); 1445 } else { 1446 netdev_info(priv->dev, "Device link is down.\n"); 1447 netif_carrier_off(priv->dev); 1448 } 1449 } 1450 1451 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1452 struct netlink_ext_ack *extack) 1453 { 1454 struct bpf_prog *old_prog; 1455 int err = 0; 1456 u32 status; 1457 1458 old_prog = READ_ONCE(priv->xdp_prog); 1459 if (!netif_carrier_ok(priv->dev)) { 1460 WRITE_ONCE(priv->xdp_prog, prog); 1461 if (old_prog) 1462 bpf_prog_put(old_prog); 1463 return 0; 1464 } 1465 1466 gve_turndown(priv); 1467 if (!old_prog && prog) { 1468 // Allocate XDP TX queues if an XDP program is 1469 // being installed 1470 err = gve_add_xdp_queues(priv); 1471 if (err) 1472 goto out; 1473 } else if (old_prog && !prog) { 1474 // Remove XDP TX queues if an XDP program is 1475 // being uninstalled 1476 err = gve_remove_xdp_queues(priv); 1477 if (err) 1478 goto out; 1479 } 1480 WRITE_ONCE(priv->xdp_prog, prog); 1481 if (old_prog) 1482 bpf_prog_put(old_prog); 1483 1484 out: 1485 gve_turnup(priv); 1486 status = ioread32be(&priv->reg_bar0->device_status); 1487 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1488 return err; 1489 } 1490 1491 static int gve_xsk_pool_enable(struct net_device *dev, 1492 struct xsk_buff_pool *pool, 1493 u16 qid) 1494 { 1495 struct gve_priv *priv = netdev_priv(dev); 1496 struct napi_struct *napi; 1497 struct gve_rx_ring *rx; 1498 int tx_qid; 1499 int err; 1500 1501 if (qid >= priv->rx_cfg.num_queues) { 1502 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1503 return -EINVAL; 1504 } 1505 if (xsk_pool_get_rx_frame_size(pool) < 1506 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1507 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1508 return -EINVAL; 1509 } 1510 1511 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1512 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1513 if (err) 1514 return err; 1515 1516 /* If XDP prog is not installed, return */ 1517 if (!priv->xdp_prog) 1518 return 0; 1519 1520 rx = &priv->rx[qid]; 1521 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1522 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1523 if (err) 1524 goto err; 1525 1526 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1527 MEM_TYPE_XSK_BUFF_POOL, NULL); 1528 if (err) 1529 goto err; 1530 1531 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1532 rx->xsk_pool = pool; 1533 1534 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1535 priv->tx[tx_qid].xsk_pool = pool; 1536 1537 return 0; 1538 err: 1539 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1540 xdp_rxq_info_unreg(&rx->xsk_rxq); 1541 1542 xsk_pool_dma_unmap(pool, 1543 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1544 return err; 1545 } 1546 1547 static int gve_xsk_pool_disable(struct net_device *dev, 1548 u16 qid) 1549 { 1550 struct gve_priv *priv = netdev_priv(dev); 1551 struct napi_struct *napi_rx; 1552 struct napi_struct *napi_tx; 1553 struct xsk_buff_pool *pool; 1554 int tx_qid; 1555 1556 pool = xsk_get_pool_from_qid(dev, qid); 1557 if (!pool) 1558 return -EINVAL; 1559 if (qid >= priv->rx_cfg.num_queues) 1560 return -EINVAL; 1561 1562 /* If XDP prog is not installed, unmap DMA and return */ 1563 if (!priv->xdp_prog) 1564 goto done; 1565 1566 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1567 if (!netif_running(dev)) { 1568 priv->rx[qid].xsk_pool = NULL; 1569 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1570 priv->tx[tx_qid].xsk_pool = NULL; 1571 goto done; 1572 } 1573 1574 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1575 napi_disable(napi_rx); /* make sure current rx poll is done */ 1576 1577 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1578 napi_disable(napi_tx); /* make sure current tx poll is done */ 1579 1580 priv->rx[qid].xsk_pool = NULL; 1581 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1582 priv->tx[tx_qid].xsk_pool = NULL; 1583 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1584 1585 napi_enable(napi_rx); 1586 if (gve_rx_work_pending(&priv->rx[qid])) 1587 napi_schedule(napi_rx); 1588 1589 napi_enable(napi_tx); 1590 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1591 napi_schedule(napi_tx); 1592 1593 done: 1594 xsk_pool_dma_unmap(pool, 1595 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1596 return 0; 1597 } 1598 1599 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1600 { 1601 struct gve_priv *priv = netdev_priv(dev); 1602 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1603 1604 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1605 return -EINVAL; 1606 1607 if (flags & XDP_WAKEUP_TX) { 1608 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1609 struct napi_struct *napi = 1610 &priv->ntfy_blocks[tx->ntfy_id].napi; 1611 1612 if (!napi_if_scheduled_mark_missed(napi)) { 1613 /* Call local_bh_enable to trigger SoftIRQ processing */ 1614 local_bh_disable(); 1615 napi_schedule(napi); 1616 local_bh_enable(); 1617 } 1618 1619 tx->xdp_xsk_wakeup++; 1620 } 1621 1622 return 0; 1623 } 1624 1625 static int verify_xdp_configuration(struct net_device *dev) 1626 { 1627 struct gve_priv *priv = netdev_priv(dev); 1628 1629 if (dev->features & NETIF_F_LRO) { 1630 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1631 return -EOPNOTSUPP; 1632 } 1633 1634 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1635 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1636 priv->queue_format); 1637 return -EOPNOTSUPP; 1638 } 1639 1640 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1641 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1642 dev->mtu); 1643 return -EOPNOTSUPP; 1644 } 1645 1646 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1647 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1648 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1649 priv->rx_cfg.num_queues, 1650 priv->tx_cfg.num_queues, 1651 priv->tx_cfg.max_queues); 1652 return -EINVAL; 1653 } 1654 return 0; 1655 } 1656 1657 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1658 { 1659 struct gve_priv *priv = netdev_priv(dev); 1660 int err; 1661 1662 err = verify_xdp_configuration(dev); 1663 if (err) 1664 return err; 1665 switch (xdp->command) { 1666 case XDP_SETUP_PROG: 1667 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1668 case XDP_SETUP_XSK_POOL: 1669 if (xdp->xsk.pool) 1670 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1671 else 1672 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1673 default: 1674 return -EINVAL; 1675 } 1676 } 1677 1678 int gve_adjust_queues(struct gve_priv *priv, 1679 struct gve_queue_config new_rx_config, 1680 struct gve_queue_config new_tx_config) 1681 { 1682 int err; 1683 1684 if (netif_carrier_ok(priv->dev)) { 1685 /* To make this process as simple as possible we teardown the 1686 * device, set the new configuration, and then bring the device 1687 * up again. 1688 */ 1689 err = gve_close(priv->dev); 1690 /* we have already tried to reset in close, 1691 * just fail at this point 1692 */ 1693 if (err) 1694 return err; 1695 priv->tx_cfg = new_tx_config; 1696 priv->rx_cfg = new_rx_config; 1697 1698 err = gve_open(priv->dev); 1699 if (err) 1700 goto err; 1701 1702 return 0; 1703 } 1704 /* Set the config for the next up. */ 1705 priv->tx_cfg = new_tx_config; 1706 priv->rx_cfg = new_rx_config; 1707 1708 return 0; 1709 err: 1710 netif_err(priv, drv, priv->dev, 1711 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1712 gve_turndown(priv); 1713 return err; 1714 } 1715 1716 static void gve_turndown(struct gve_priv *priv) 1717 { 1718 int idx; 1719 1720 if (netif_carrier_ok(priv->dev)) 1721 netif_carrier_off(priv->dev); 1722 1723 if (!gve_get_napi_enabled(priv)) 1724 return; 1725 1726 /* Disable napi to prevent more work from coming in */ 1727 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1728 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1729 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1730 1731 napi_disable(&block->napi); 1732 } 1733 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1734 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1735 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1736 1737 napi_disable(&block->napi); 1738 } 1739 1740 /* Stop tx queues */ 1741 netif_tx_disable(priv->dev); 1742 1743 gve_clear_napi_enabled(priv); 1744 gve_clear_report_stats(priv); 1745 } 1746 1747 static void gve_turnup(struct gve_priv *priv) 1748 { 1749 int idx; 1750 1751 /* Start the tx queues */ 1752 netif_tx_start_all_queues(priv->dev); 1753 1754 /* Enable napi and unmask interrupts for all queues */ 1755 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1756 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1757 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1758 1759 napi_enable(&block->napi); 1760 if (gve_is_gqi(priv)) { 1761 iowrite32be(0, gve_irq_doorbell(priv, block)); 1762 } else { 1763 gve_set_itr_coalesce_usecs_dqo(priv, block, 1764 priv->tx_coalesce_usecs); 1765 } 1766 } 1767 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1768 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1769 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1770 1771 napi_enable(&block->napi); 1772 if (gve_is_gqi(priv)) { 1773 iowrite32be(0, gve_irq_doorbell(priv, block)); 1774 } else { 1775 gve_set_itr_coalesce_usecs_dqo(priv, block, 1776 priv->rx_coalesce_usecs); 1777 } 1778 } 1779 1780 gve_set_napi_enabled(priv); 1781 } 1782 1783 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1784 { 1785 struct gve_notify_block *block; 1786 struct gve_tx_ring *tx = NULL; 1787 struct gve_priv *priv; 1788 u32 last_nic_done; 1789 u32 current_time; 1790 u32 ntfy_idx; 1791 1792 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1793 priv = netdev_priv(dev); 1794 if (txqueue > priv->tx_cfg.num_queues) 1795 goto reset; 1796 1797 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1798 if (ntfy_idx >= priv->num_ntfy_blks) 1799 goto reset; 1800 1801 block = &priv->ntfy_blocks[ntfy_idx]; 1802 tx = block->tx; 1803 1804 current_time = jiffies_to_msecs(jiffies); 1805 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1806 goto reset; 1807 1808 /* Check to see if there are missed completions, which will allow us to 1809 * kick the queue. 1810 */ 1811 last_nic_done = gve_tx_load_event_counter(priv, tx); 1812 if (last_nic_done - tx->done) { 1813 netdev_info(dev, "Kicking queue %d", txqueue); 1814 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1815 napi_schedule(&block->napi); 1816 tx->last_kick_msec = current_time; 1817 goto out; 1818 } // Else reset. 1819 1820 reset: 1821 gve_schedule_reset(priv); 1822 1823 out: 1824 if (tx) 1825 tx->queue_timeout++; 1826 priv->tx_timeo_cnt++; 1827 } 1828 1829 static int gve_set_features(struct net_device *netdev, 1830 netdev_features_t features) 1831 { 1832 const netdev_features_t orig_features = netdev->features; 1833 struct gve_priv *priv = netdev_priv(netdev); 1834 int err; 1835 1836 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1837 netdev->features ^= NETIF_F_LRO; 1838 if (netif_carrier_ok(netdev)) { 1839 /* To make this process as simple as possible we 1840 * teardown the device, set the new configuration, 1841 * and then bring the device up again. 1842 */ 1843 err = gve_close(netdev); 1844 /* We have already tried to reset in close, just fail 1845 * at this point. 1846 */ 1847 if (err) 1848 goto err; 1849 1850 err = gve_open(netdev); 1851 if (err) 1852 goto err; 1853 } 1854 } 1855 1856 return 0; 1857 err: 1858 /* Reverts the change on error. */ 1859 netdev->features = orig_features; 1860 netif_err(priv, drv, netdev, 1861 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1862 return err; 1863 } 1864 1865 static const struct net_device_ops gve_netdev_ops = { 1866 .ndo_start_xmit = gve_start_xmit, 1867 .ndo_open = gve_open, 1868 .ndo_stop = gve_close, 1869 .ndo_get_stats64 = gve_get_stats, 1870 .ndo_tx_timeout = gve_tx_timeout, 1871 .ndo_set_features = gve_set_features, 1872 .ndo_bpf = gve_xdp, 1873 .ndo_xdp_xmit = gve_xdp_xmit, 1874 .ndo_xsk_wakeup = gve_xsk_wakeup, 1875 }; 1876 1877 static void gve_handle_status(struct gve_priv *priv, u32 status) 1878 { 1879 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1880 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1881 gve_set_do_reset(priv); 1882 } 1883 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1884 priv->stats_report_trigger_cnt++; 1885 gve_set_do_report_stats(priv); 1886 } 1887 } 1888 1889 static void gve_handle_reset(struct gve_priv *priv) 1890 { 1891 /* A service task will be scheduled at the end of probe to catch any 1892 * resets that need to happen, and we don't want to reset until 1893 * probe is done. 1894 */ 1895 if (gve_get_probe_in_progress(priv)) 1896 return; 1897 1898 if (gve_get_do_reset(priv)) { 1899 rtnl_lock(); 1900 gve_reset(priv, false); 1901 rtnl_unlock(); 1902 } 1903 } 1904 1905 void gve_handle_report_stats(struct gve_priv *priv) 1906 { 1907 struct stats *stats = priv->stats_report->stats; 1908 int idx, stats_idx = 0; 1909 unsigned int start = 0; 1910 u64 tx_bytes; 1911 1912 if (!gve_get_report_stats(priv)) 1913 return; 1914 1915 be64_add_cpu(&priv->stats_report->written_count, 1); 1916 /* tx stats */ 1917 if (priv->tx) { 1918 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1919 u32 last_completion = 0; 1920 u32 tx_frames = 0; 1921 1922 /* DQO doesn't currently support these metrics. */ 1923 if (gve_is_gqi(priv)) { 1924 last_completion = priv->tx[idx].done; 1925 tx_frames = priv->tx[idx].req; 1926 } 1927 1928 do { 1929 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1930 tx_bytes = priv->tx[idx].bytes_done; 1931 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1932 stats[stats_idx++] = (struct stats) { 1933 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1934 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1935 .queue_id = cpu_to_be32(idx), 1936 }; 1937 stats[stats_idx++] = (struct stats) { 1938 .stat_name = cpu_to_be32(TX_STOP_CNT), 1939 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1940 .queue_id = cpu_to_be32(idx), 1941 }; 1942 stats[stats_idx++] = (struct stats) { 1943 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1944 .value = cpu_to_be64(tx_frames), 1945 .queue_id = cpu_to_be32(idx), 1946 }; 1947 stats[stats_idx++] = (struct stats) { 1948 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1949 .value = cpu_to_be64(tx_bytes), 1950 .queue_id = cpu_to_be32(idx), 1951 }; 1952 stats[stats_idx++] = (struct stats) { 1953 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1954 .value = cpu_to_be64(last_completion), 1955 .queue_id = cpu_to_be32(idx), 1956 }; 1957 stats[stats_idx++] = (struct stats) { 1958 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1959 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1960 .queue_id = cpu_to_be32(idx), 1961 }; 1962 } 1963 } 1964 /* rx stats */ 1965 if (priv->rx) { 1966 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1967 stats[stats_idx++] = (struct stats) { 1968 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1969 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1970 .queue_id = cpu_to_be32(idx), 1971 }; 1972 stats[stats_idx++] = (struct stats) { 1973 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1974 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1975 .queue_id = cpu_to_be32(idx), 1976 }; 1977 } 1978 } 1979 } 1980 1981 /* Handle NIC status register changes, reset requests and report stats */ 1982 static void gve_service_task(struct work_struct *work) 1983 { 1984 struct gve_priv *priv = container_of(work, struct gve_priv, 1985 service_task); 1986 u32 status = ioread32be(&priv->reg_bar0->device_status); 1987 1988 gve_handle_status(priv, status); 1989 1990 gve_handle_reset(priv); 1991 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1992 } 1993 1994 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 1995 { 1996 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 1997 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 1998 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 1999 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2000 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2001 } else { 2002 priv->dev->xdp_features = 0; 2003 } 2004 } 2005 2006 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2007 { 2008 int num_ntfy; 2009 int err; 2010 2011 /* Set up the adminq */ 2012 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2013 if (err) { 2014 dev_err(&priv->pdev->dev, 2015 "Failed to alloc admin queue: err=%d\n", err); 2016 return err; 2017 } 2018 2019 err = gve_verify_driver_compatibility(priv); 2020 if (err) { 2021 dev_err(&priv->pdev->dev, 2022 "Could not verify driver compatibility: err=%d\n", err); 2023 goto err; 2024 } 2025 2026 if (skip_describe_device) 2027 goto setup_device; 2028 2029 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2030 /* Get the initial information we need from the device */ 2031 err = gve_adminq_describe_device(priv); 2032 if (err) { 2033 dev_err(&priv->pdev->dev, 2034 "Could not get device information: err=%d\n", err); 2035 goto err; 2036 } 2037 priv->dev->mtu = priv->dev->max_mtu; 2038 num_ntfy = pci_msix_vec_count(priv->pdev); 2039 if (num_ntfy <= 0) { 2040 dev_err(&priv->pdev->dev, 2041 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2042 err = num_ntfy; 2043 goto err; 2044 } else if (num_ntfy < GVE_MIN_MSIX) { 2045 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2046 GVE_MIN_MSIX, num_ntfy); 2047 err = -EINVAL; 2048 goto err; 2049 } 2050 2051 /* Big TCP is only supported on DQ*/ 2052 if (!gve_is_gqi(priv)) 2053 netif_set_tso_max_size(priv->dev, DQO_TX_MAX); 2054 2055 priv->num_registered_pages = 0; 2056 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2057 /* gvnic has one Notification Block per MSI-x vector, except for the 2058 * management vector 2059 */ 2060 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2061 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2062 2063 priv->tx_cfg.max_queues = 2064 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2065 priv->rx_cfg.max_queues = 2066 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2067 2068 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2069 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2070 if (priv->default_num_queues > 0) { 2071 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2072 priv->tx_cfg.num_queues); 2073 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2074 priv->rx_cfg.num_queues); 2075 } 2076 2077 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2078 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2079 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2080 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2081 2082 if (!gve_is_gqi(priv)) { 2083 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2084 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2085 } 2086 2087 setup_device: 2088 gve_set_netdev_xdp_features(priv); 2089 err = gve_setup_device_resources(priv); 2090 if (!err) 2091 return 0; 2092 err: 2093 gve_adminq_free(&priv->pdev->dev, priv); 2094 return err; 2095 } 2096 2097 static void gve_teardown_priv_resources(struct gve_priv *priv) 2098 { 2099 gve_teardown_device_resources(priv); 2100 gve_adminq_free(&priv->pdev->dev, priv); 2101 } 2102 2103 static void gve_trigger_reset(struct gve_priv *priv) 2104 { 2105 /* Reset the device by releasing the AQ */ 2106 gve_adminq_release(priv); 2107 } 2108 2109 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2110 { 2111 gve_trigger_reset(priv); 2112 /* With the reset having already happened, close cannot fail */ 2113 if (was_up) 2114 gve_close(priv->dev); 2115 gve_teardown_priv_resources(priv); 2116 } 2117 2118 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2119 { 2120 int err; 2121 2122 err = gve_init_priv(priv, true); 2123 if (err) 2124 goto err; 2125 if (was_up) { 2126 err = gve_open(priv->dev); 2127 if (err) 2128 goto err; 2129 } 2130 return 0; 2131 err: 2132 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2133 gve_turndown(priv); 2134 return err; 2135 } 2136 2137 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2138 { 2139 bool was_up = netif_carrier_ok(priv->dev); 2140 int err; 2141 2142 dev_info(&priv->pdev->dev, "Performing reset\n"); 2143 gve_clear_do_reset(priv); 2144 gve_set_reset_in_progress(priv); 2145 /* If we aren't attempting to teardown normally, just go turndown and 2146 * reset right away. 2147 */ 2148 if (!attempt_teardown) { 2149 gve_turndown(priv); 2150 gve_reset_and_teardown(priv, was_up); 2151 } else { 2152 /* Otherwise attempt to close normally */ 2153 if (was_up) { 2154 err = gve_close(priv->dev); 2155 /* If that fails reset as we did above */ 2156 if (err) 2157 gve_reset_and_teardown(priv, was_up); 2158 } 2159 /* Clean up any remaining resources */ 2160 gve_teardown_priv_resources(priv); 2161 } 2162 2163 /* Set it all back up */ 2164 err = gve_reset_recovery(priv, was_up); 2165 gve_clear_reset_in_progress(priv); 2166 priv->reset_cnt++; 2167 priv->interface_up_cnt = 0; 2168 priv->interface_down_cnt = 0; 2169 priv->stats_report_trigger_cnt = 0; 2170 return err; 2171 } 2172 2173 static void gve_write_version(u8 __iomem *driver_version_register) 2174 { 2175 const char *c = gve_version_prefix; 2176 2177 while (*c) { 2178 writeb(*c, driver_version_register); 2179 c++; 2180 } 2181 2182 c = gve_version_str; 2183 while (*c) { 2184 writeb(*c, driver_version_register); 2185 c++; 2186 } 2187 writeb('\n', driver_version_register); 2188 } 2189 2190 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2191 { 2192 int max_tx_queues, max_rx_queues; 2193 struct net_device *dev; 2194 __be32 __iomem *db_bar; 2195 struct gve_registers __iomem *reg_bar; 2196 struct gve_priv *priv; 2197 int err; 2198 2199 err = pci_enable_device(pdev); 2200 if (err) 2201 return err; 2202 2203 err = pci_request_regions(pdev, "gvnic-cfg"); 2204 if (err) 2205 goto abort_with_enabled; 2206 2207 pci_set_master(pdev); 2208 2209 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2210 if (err) { 2211 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2212 goto abort_with_pci_region; 2213 } 2214 2215 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2216 if (!reg_bar) { 2217 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2218 err = -ENOMEM; 2219 goto abort_with_pci_region; 2220 } 2221 2222 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2223 if (!db_bar) { 2224 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2225 err = -ENOMEM; 2226 goto abort_with_reg_bar; 2227 } 2228 2229 gve_write_version(®_bar->driver_version); 2230 /* Get max queues to alloc etherdev */ 2231 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2232 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2233 /* Alloc and setup the netdev and priv */ 2234 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2235 if (!dev) { 2236 dev_err(&pdev->dev, "could not allocate netdev\n"); 2237 err = -ENOMEM; 2238 goto abort_with_db_bar; 2239 } 2240 SET_NETDEV_DEV(dev, &pdev->dev); 2241 pci_set_drvdata(pdev, dev); 2242 dev->ethtool_ops = &gve_ethtool_ops; 2243 dev->netdev_ops = &gve_netdev_ops; 2244 2245 /* Set default and supported features. 2246 * 2247 * Features might be set in other locations as well (such as 2248 * `gve_adminq_describe_device`). 2249 */ 2250 dev->hw_features = NETIF_F_HIGHDMA; 2251 dev->hw_features |= NETIF_F_SG; 2252 dev->hw_features |= NETIF_F_HW_CSUM; 2253 dev->hw_features |= NETIF_F_TSO; 2254 dev->hw_features |= NETIF_F_TSO6; 2255 dev->hw_features |= NETIF_F_TSO_ECN; 2256 dev->hw_features |= NETIF_F_RXCSUM; 2257 dev->hw_features |= NETIF_F_RXHASH; 2258 dev->features = dev->hw_features; 2259 dev->watchdog_timeo = 5 * HZ; 2260 dev->min_mtu = ETH_MIN_MTU; 2261 netif_carrier_off(dev); 2262 2263 priv = netdev_priv(dev); 2264 priv->dev = dev; 2265 priv->pdev = pdev; 2266 priv->msg_enable = DEFAULT_MSG_LEVEL; 2267 priv->reg_bar0 = reg_bar; 2268 priv->db_bar2 = db_bar; 2269 priv->service_task_flags = 0x0; 2270 priv->state_flags = 0x0; 2271 priv->ethtool_flags = 0x0; 2272 2273 gve_set_probe_in_progress(priv); 2274 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2275 if (!priv->gve_wq) { 2276 dev_err(&pdev->dev, "Could not allocate workqueue"); 2277 err = -ENOMEM; 2278 goto abort_with_netdev; 2279 } 2280 INIT_WORK(&priv->service_task, gve_service_task); 2281 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2282 priv->tx_cfg.max_queues = max_tx_queues; 2283 priv->rx_cfg.max_queues = max_rx_queues; 2284 2285 err = gve_init_priv(priv, false); 2286 if (err) 2287 goto abort_with_wq; 2288 2289 err = register_netdev(dev); 2290 if (err) 2291 goto abort_with_gve_init; 2292 2293 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2294 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2295 gve_clear_probe_in_progress(priv); 2296 queue_work(priv->gve_wq, &priv->service_task); 2297 return 0; 2298 2299 abort_with_gve_init: 2300 gve_teardown_priv_resources(priv); 2301 2302 abort_with_wq: 2303 destroy_workqueue(priv->gve_wq); 2304 2305 abort_with_netdev: 2306 free_netdev(dev); 2307 2308 abort_with_db_bar: 2309 pci_iounmap(pdev, db_bar); 2310 2311 abort_with_reg_bar: 2312 pci_iounmap(pdev, reg_bar); 2313 2314 abort_with_pci_region: 2315 pci_release_regions(pdev); 2316 2317 abort_with_enabled: 2318 pci_disable_device(pdev); 2319 return err; 2320 } 2321 2322 static void gve_remove(struct pci_dev *pdev) 2323 { 2324 struct net_device *netdev = pci_get_drvdata(pdev); 2325 struct gve_priv *priv = netdev_priv(netdev); 2326 __be32 __iomem *db_bar = priv->db_bar2; 2327 void __iomem *reg_bar = priv->reg_bar0; 2328 2329 unregister_netdev(netdev); 2330 gve_teardown_priv_resources(priv); 2331 destroy_workqueue(priv->gve_wq); 2332 free_netdev(netdev); 2333 pci_iounmap(pdev, db_bar); 2334 pci_iounmap(pdev, reg_bar); 2335 pci_release_regions(pdev); 2336 pci_disable_device(pdev); 2337 } 2338 2339 static void gve_shutdown(struct pci_dev *pdev) 2340 { 2341 struct net_device *netdev = pci_get_drvdata(pdev); 2342 struct gve_priv *priv = netdev_priv(netdev); 2343 bool was_up = netif_carrier_ok(priv->dev); 2344 2345 rtnl_lock(); 2346 if (was_up && gve_close(priv->dev)) { 2347 /* If the dev was up, attempt to close, if close fails, reset */ 2348 gve_reset_and_teardown(priv, was_up); 2349 } else { 2350 /* If the dev wasn't up or close worked, finish tearing down */ 2351 gve_teardown_priv_resources(priv); 2352 } 2353 rtnl_unlock(); 2354 } 2355 2356 #ifdef CONFIG_PM 2357 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2358 { 2359 struct net_device *netdev = pci_get_drvdata(pdev); 2360 struct gve_priv *priv = netdev_priv(netdev); 2361 bool was_up = netif_carrier_ok(priv->dev); 2362 2363 priv->suspend_cnt++; 2364 rtnl_lock(); 2365 if (was_up && gve_close(priv->dev)) { 2366 /* If the dev was up, attempt to close, if close fails, reset */ 2367 gve_reset_and_teardown(priv, was_up); 2368 } else { 2369 /* If the dev wasn't up or close worked, finish tearing down */ 2370 gve_teardown_priv_resources(priv); 2371 } 2372 priv->up_before_suspend = was_up; 2373 rtnl_unlock(); 2374 return 0; 2375 } 2376 2377 static int gve_resume(struct pci_dev *pdev) 2378 { 2379 struct net_device *netdev = pci_get_drvdata(pdev); 2380 struct gve_priv *priv = netdev_priv(netdev); 2381 int err; 2382 2383 priv->resume_cnt++; 2384 rtnl_lock(); 2385 err = gve_reset_recovery(priv, priv->up_before_suspend); 2386 rtnl_unlock(); 2387 return err; 2388 } 2389 #endif /* CONFIG_PM */ 2390 2391 static const struct pci_device_id gve_id_table[] = { 2392 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2393 { } 2394 }; 2395 2396 static struct pci_driver gvnic_driver = { 2397 .name = "gvnic", 2398 .id_table = gve_id_table, 2399 .probe = gve_probe, 2400 .remove = gve_remove, 2401 .shutdown = gve_shutdown, 2402 #ifdef CONFIG_PM 2403 .suspend = gve_suspend, 2404 .resume = gve_resume, 2405 #endif 2406 }; 2407 2408 module_pci_driver(gvnic_driver); 2409 2410 MODULE_DEVICE_TABLE(pci, gve_id_table); 2411 MODULE_AUTHOR("Google, Inc."); 2412 MODULE_DESCRIPTION("gVNIC Driver"); 2413 MODULE_LICENSE("Dual MIT/GPL"); 2414 MODULE_VERSION(GVE_VERSION); 2415