1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 #define DQO_TX_MAX 0x3FFFF 35 36 char gve_driver_name[] = "gve"; 37 const char gve_version_str[] = GVE_VERSION; 38 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 39 40 static int gve_verify_driver_compatibility(struct gve_priv *priv) 41 { 42 int err; 43 struct gve_driver_info *driver_info; 44 dma_addr_t driver_info_bus; 45 46 driver_info = dma_alloc_coherent(&priv->pdev->dev, 47 sizeof(struct gve_driver_info), 48 &driver_info_bus, GFP_KERNEL); 49 if (!driver_info) 50 return -ENOMEM; 51 52 *driver_info = (struct gve_driver_info) { 53 .os_type = 1, /* Linux */ 54 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 55 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 56 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 57 .driver_capability_flags = { 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 62 }, 63 }; 64 strscpy(driver_info->os_version_str1, utsname()->release, 65 sizeof(driver_info->os_version_str1)); 66 strscpy(driver_info->os_version_str2, utsname()->version, 67 sizeof(driver_info->os_version_str2)); 68 69 err = gve_adminq_verify_driver_compatibility(priv, 70 sizeof(struct gve_driver_info), 71 driver_info_bus); 72 73 /* It's ok if the device doesn't support this */ 74 if (err == -EOPNOTSUPP) 75 err = 0; 76 77 dma_free_coherent(&priv->pdev->dev, 78 sizeof(struct gve_driver_info), 79 driver_info, driver_info_bus); 80 return err; 81 } 82 83 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 84 { 85 struct gve_priv *priv = netdev_priv(dev); 86 87 if (gve_is_gqi(priv)) 88 return gve_tx(skb, dev); 89 else 90 return gve_tx_dqo(skb, dev); 91 } 92 93 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 94 { 95 struct gve_priv *priv = netdev_priv(dev); 96 unsigned int start; 97 u64 packets, bytes; 98 int num_tx_queues; 99 int ring; 100 101 num_tx_queues = gve_num_tx_queues(priv); 102 if (priv->rx) { 103 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 104 do { 105 start = 106 u64_stats_fetch_begin(&priv->rx[ring].statss); 107 packets = priv->rx[ring].rpackets; 108 bytes = priv->rx[ring].rbytes; 109 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 110 start)); 111 s->rx_packets += packets; 112 s->rx_bytes += bytes; 113 } 114 } 115 if (priv->tx) { 116 for (ring = 0; ring < num_tx_queues; ring++) { 117 do { 118 start = 119 u64_stats_fetch_begin(&priv->tx[ring].statss); 120 packets = priv->tx[ring].pkt_done; 121 bytes = priv->tx[ring].bytes_done; 122 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 123 start)); 124 s->tx_packets += packets; 125 s->tx_bytes += bytes; 126 } 127 } 128 } 129 130 static int gve_alloc_counter_array(struct gve_priv *priv) 131 { 132 priv->counter_array = 133 dma_alloc_coherent(&priv->pdev->dev, 134 priv->num_event_counters * 135 sizeof(*priv->counter_array), 136 &priv->counter_array_bus, GFP_KERNEL); 137 if (!priv->counter_array) 138 return -ENOMEM; 139 140 return 0; 141 } 142 143 static void gve_free_counter_array(struct gve_priv *priv) 144 { 145 if (!priv->counter_array) 146 return; 147 148 dma_free_coherent(&priv->pdev->dev, 149 priv->num_event_counters * 150 sizeof(*priv->counter_array), 151 priv->counter_array, priv->counter_array_bus); 152 priv->counter_array = NULL; 153 } 154 155 /* NIC requests to report stats */ 156 static void gve_stats_report_task(struct work_struct *work) 157 { 158 struct gve_priv *priv = container_of(work, struct gve_priv, 159 stats_report_task); 160 if (gve_get_do_report_stats(priv)) { 161 gve_handle_report_stats(priv); 162 gve_clear_do_report_stats(priv); 163 } 164 } 165 166 static void gve_stats_report_schedule(struct gve_priv *priv) 167 { 168 if (!gve_get_probe_in_progress(priv) && 169 !gve_get_reset_in_progress(priv)) { 170 gve_set_do_report_stats(priv); 171 queue_work(priv->gve_wq, &priv->stats_report_task); 172 } 173 } 174 175 static void gve_stats_report_timer(struct timer_list *t) 176 { 177 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 178 179 mod_timer(&priv->stats_report_timer, 180 round_jiffies(jiffies + 181 msecs_to_jiffies(priv->stats_report_timer_period))); 182 gve_stats_report_schedule(priv); 183 } 184 185 static int gve_alloc_stats_report(struct gve_priv *priv) 186 { 187 int tx_stats_num, rx_stats_num; 188 189 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 190 gve_num_tx_queues(priv); 191 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 192 priv->rx_cfg.num_queues; 193 priv->stats_report_len = struct_size(priv->stats_report, stats, 194 tx_stats_num + rx_stats_num); 195 priv->stats_report = 196 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 197 &priv->stats_report_bus, GFP_KERNEL); 198 if (!priv->stats_report) 199 return -ENOMEM; 200 /* Set up timer for the report-stats task */ 201 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 202 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 203 return 0; 204 } 205 206 static void gve_free_stats_report(struct gve_priv *priv) 207 { 208 if (!priv->stats_report) 209 return; 210 211 del_timer_sync(&priv->stats_report_timer); 212 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 213 priv->stats_report, priv->stats_report_bus); 214 priv->stats_report = NULL; 215 } 216 217 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 218 { 219 struct gve_priv *priv = arg; 220 221 queue_work(priv->gve_wq, &priv->service_task); 222 return IRQ_HANDLED; 223 } 224 225 static irqreturn_t gve_intr(int irq, void *arg) 226 { 227 struct gve_notify_block *block = arg; 228 struct gve_priv *priv = block->priv; 229 230 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 231 napi_schedule_irqoff(&block->napi); 232 return IRQ_HANDLED; 233 } 234 235 static irqreturn_t gve_intr_dqo(int irq, void *arg) 236 { 237 struct gve_notify_block *block = arg; 238 239 /* Interrupts are automatically masked */ 240 napi_schedule_irqoff(&block->napi); 241 return IRQ_HANDLED; 242 } 243 244 static int gve_napi_poll(struct napi_struct *napi, int budget) 245 { 246 struct gve_notify_block *block; 247 __be32 __iomem *irq_doorbell; 248 bool reschedule = false; 249 struct gve_priv *priv; 250 int work_done = 0; 251 252 block = container_of(napi, struct gve_notify_block, napi); 253 priv = block->priv; 254 255 if (block->tx) { 256 if (block->tx->q_num < priv->tx_cfg.num_queues) 257 reschedule |= gve_tx_poll(block, budget); 258 else 259 reschedule |= gve_xdp_poll(block, budget); 260 } 261 262 if (block->rx) { 263 work_done = gve_rx_poll(block, budget); 264 reschedule |= work_done == budget; 265 } 266 267 if (reschedule) 268 return budget; 269 270 /* Complete processing - don't unmask irq if busy polling is enabled */ 271 if (likely(napi_complete_done(napi, work_done))) { 272 irq_doorbell = gve_irq_doorbell(priv, block); 273 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 274 275 /* Ensure IRQ ACK is visible before we check pending work. 276 * If queue had issued updates, it would be truly visible. 277 */ 278 mb(); 279 280 if (block->tx) 281 reschedule |= gve_tx_clean_pending(priv, block->tx); 282 if (block->rx) 283 reschedule |= gve_rx_work_pending(block->rx); 284 285 if (reschedule && napi_reschedule(napi)) 286 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 287 } 288 return work_done; 289 } 290 291 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 292 { 293 struct gve_notify_block *block = 294 container_of(napi, struct gve_notify_block, napi); 295 struct gve_priv *priv = block->priv; 296 bool reschedule = false; 297 int work_done = 0; 298 299 if (block->tx) 300 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 301 302 if (block->rx) { 303 work_done = gve_rx_poll_dqo(block, budget); 304 reschedule |= work_done == budget; 305 } 306 307 if (reschedule) 308 return budget; 309 310 if (likely(napi_complete_done(napi, work_done))) { 311 /* Enable interrupts again. 312 * 313 * We don't need to repoll afterwards because HW supports the 314 * PCI MSI-X PBA feature. 315 * 316 * Another interrupt would be triggered if a new event came in 317 * since the last one. 318 */ 319 gve_write_irq_doorbell_dqo(priv, block, 320 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 321 } 322 323 return work_done; 324 } 325 326 static int gve_alloc_notify_blocks(struct gve_priv *priv) 327 { 328 int num_vecs_requested = priv->num_ntfy_blks + 1; 329 unsigned int active_cpus; 330 int vecs_enabled; 331 int i, j; 332 int err; 333 334 priv->msix_vectors = kvcalloc(num_vecs_requested, 335 sizeof(*priv->msix_vectors), GFP_KERNEL); 336 if (!priv->msix_vectors) 337 return -ENOMEM; 338 for (i = 0; i < num_vecs_requested; i++) 339 priv->msix_vectors[i].entry = i; 340 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 341 GVE_MIN_MSIX, num_vecs_requested); 342 if (vecs_enabled < 0) { 343 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 344 GVE_MIN_MSIX, vecs_enabled); 345 err = vecs_enabled; 346 goto abort_with_msix_vectors; 347 } 348 if (vecs_enabled != num_vecs_requested) { 349 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 350 int vecs_per_type = new_num_ntfy_blks / 2; 351 int vecs_left = new_num_ntfy_blks % 2; 352 353 priv->num_ntfy_blks = new_num_ntfy_blks; 354 priv->mgmt_msix_idx = priv->num_ntfy_blks; 355 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 356 vecs_per_type); 357 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 358 vecs_per_type + vecs_left); 359 dev_err(&priv->pdev->dev, 360 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 361 vecs_enabled, priv->tx_cfg.max_queues, 362 priv->rx_cfg.max_queues); 363 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 364 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 365 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 366 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 367 } 368 /* Half the notification blocks go to TX and half to RX */ 369 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 370 371 /* Setup Management Vector - the last vector */ 372 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 373 pci_name(priv->pdev)); 374 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 375 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 376 if (err) { 377 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 378 goto abort_with_msix_enabled; 379 } 380 priv->irq_db_indices = 381 dma_alloc_coherent(&priv->pdev->dev, 382 priv->num_ntfy_blks * 383 sizeof(*priv->irq_db_indices), 384 &priv->irq_db_indices_bus, GFP_KERNEL); 385 if (!priv->irq_db_indices) { 386 err = -ENOMEM; 387 goto abort_with_mgmt_vector; 388 } 389 390 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 391 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 392 if (!priv->ntfy_blocks) { 393 err = -ENOMEM; 394 goto abort_with_irq_db_indices; 395 } 396 397 /* Setup the other blocks - the first n-1 vectors */ 398 for (i = 0; i < priv->num_ntfy_blks; i++) { 399 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 400 int msix_idx = i; 401 402 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 403 i, pci_name(priv->pdev)); 404 block->priv = priv; 405 err = request_irq(priv->msix_vectors[msix_idx].vector, 406 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 407 0, block->name, block); 408 if (err) { 409 dev_err(&priv->pdev->dev, 410 "Failed to receive msix vector %d\n", i); 411 goto abort_with_some_ntfy_blocks; 412 } 413 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 414 get_cpu_mask(i % active_cpus)); 415 block->irq_db_index = &priv->irq_db_indices[i].index; 416 } 417 return 0; 418 abort_with_some_ntfy_blocks: 419 for (j = 0; j < i; j++) { 420 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 421 int msix_idx = j; 422 423 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 424 NULL); 425 free_irq(priv->msix_vectors[msix_idx].vector, block); 426 } 427 kvfree(priv->ntfy_blocks); 428 priv->ntfy_blocks = NULL; 429 abort_with_irq_db_indices: 430 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 431 sizeof(*priv->irq_db_indices), 432 priv->irq_db_indices, priv->irq_db_indices_bus); 433 priv->irq_db_indices = NULL; 434 abort_with_mgmt_vector: 435 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 436 abort_with_msix_enabled: 437 pci_disable_msix(priv->pdev); 438 abort_with_msix_vectors: 439 kvfree(priv->msix_vectors); 440 priv->msix_vectors = NULL; 441 return err; 442 } 443 444 static void gve_free_notify_blocks(struct gve_priv *priv) 445 { 446 int i; 447 448 if (!priv->msix_vectors) 449 return; 450 451 /* Free the irqs */ 452 for (i = 0; i < priv->num_ntfy_blks; i++) { 453 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 454 int msix_idx = i; 455 456 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 457 NULL); 458 free_irq(priv->msix_vectors[msix_idx].vector, block); 459 } 460 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 461 kvfree(priv->ntfy_blocks); 462 priv->ntfy_blocks = NULL; 463 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 464 sizeof(*priv->irq_db_indices), 465 priv->irq_db_indices, priv->irq_db_indices_bus); 466 priv->irq_db_indices = NULL; 467 pci_disable_msix(priv->pdev); 468 kvfree(priv->msix_vectors); 469 priv->msix_vectors = NULL; 470 } 471 472 static int gve_setup_device_resources(struct gve_priv *priv) 473 { 474 int err; 475 476 err = gve_alloc_counter_array(priv); 477 if (err) 478 return err; 479 err = gve_alloc_notify_blocks(priv); 480 if (err) 481 goto abort_with_counter; 482 err = gve_alloc_stats_report(priv); 483 if (err) 484 goto abort_with_ntfy_blocks; 485 err = gve_adminq_configure_device_resources(priv, 486 priv->counter_array_bus, 487 priv->num_event_counters, 488 priv->irq_db_indices_bus, 489 priv->num_ntfy_blks); 490 if (unlikely(err)) { 491 dev_err(&priv->pdev->dev, 492 "could not setup device_resources: err=%d\n", err); 493 err = -ENXIO; 494 goto abort_with_stats_report; 495 } 496 497 if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 498 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 499 GFP_KERNEL); 500 if (!priv->ptype_lut_dqo) { 501 err = -ENOMEM; 502 goto abort_with_stats_report; 503 } 504 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 505 if (err) { 506 dev_err(&priv->pdev->dev, 507 "Failed to get ptype map: err=%d\n", err); 508 goto abort_with_ptype_lut; 509 } 510 } 511 512 err = gve_adminq_report_stats(priv, priv->stats_report_len, 513 priv->stats_report_bus, 514 GVE_STATS_REPORT_TIMER_PERIOD); 515 if (err) 516 dev_err(&priv->pdev->dev, 517 "Failed to report stats: err=%d\n", err); 518 gve_set_device_resources_ok(priv); 519 return 0; 520 521 abort_with_ptype_lut: 522 kvfree(priv->ptype_lut_dqo); 523 priv->ptype_lut_dqo = NULL; 524 abort_with_stats_report: 525 gve_free_stats_report(priv); 526 abort_with_ntfy_blocks: 527 gve_free_notify_blocks(priv); 528 abort_with_counter: 529 gve_free_counter_array(priv); 530 531 return err; 532 } 533 534 static void gve_trigger_reset(struct gve_priv *priv); 535 536 static void gve_teardown_device_resources(struct gve_priv *priv) 537 { 538 int err; 539 540 /* Tell device its resources are being freed */ 541 if (gve_get_device_resources_ok(priv)) { 542 /* detach the stats report */ 543 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 544 if (err) { 545 dev_err(&priv->pdev->dev, 546 "Failed to detach stats report: err=%d\n", err); 547 gve_trigger_reset(priv); 548 } 549 err = gve_adminq_deconfigure_device_resources(priv); 550 if (err) { 551 dev_err(&priv->pdev->dev, 552 "Could not deconfigure device resources: err=%d\n", 553 err); 554 gve_trigger_reset(priv); 555 } 556 } 557 558 kvfree(priv->ptype_lut_dqo); 559 priv->ptype_lut_dqo = NULL; 560 561 gve_free_counter_array(priv); 562 gve_free_notify_blocks(priv); 563 gve_free_stats_report(priv); 564 gve_clear_device_resources_ok(priv); 565 } 566 567 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 568 int (*gve_poll)(struct napi_struct *, int)) 569 { 570 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 571 572 netif_napi_add(priv->dev, &block->napi, gve_poll); 573 } 574 575 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 576 { 577 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 578 579 netif_napi_del(&block->napi); 580 } 581 582 static int gve_register_xdp_qpls(struct gve_priv *priv) 583 { 584 int start_id; 585 int err; 586 int i; 587 588 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 589 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 590 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 591 if (err) { 592 netif_err(priv, drv, priv->dev, 593 "failed to register queue page list %d\n", 594 priv->qpls[i].id); 595 /* This failure will trigger a reset - no need to clean 596 * up 597 */ 598 return err; 599 } 600 } 601 return 0; 602 } 603 604 static int gve_register_qpls(struct gve_priv *priv) 605 { 606 int start_id; 607 int err; 608 int i; 609 610 start_id = gve_tx_start_qpl_id(priv); 611 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 612 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 613 if (err) { 614 netif_err(priv, drv, priv->dev, 615 "failed to register queue page list %d\n", 616 priv->qpls[i].id); 617 /* This failure will trigger a reset - no need to clean 618 * up 619 */ 620 return err; 621 } 622 } 623 624 start_id = gve_rx_start_qpl_id(priv); 625 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 626 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 627 if (err) { 628 netif_err(priv, drv, priv->dev, 629 "failed to register queue page list %d\n", 630 priv->qpls[i].id); 631 /* This failure will trigger a reset - no need to clean 632 * up 633 */ 634 return err; 635 } 636 } 637 return 0; 638 } 639 640 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 641 { 642 int start_id; 643 int err; 644 int i; 645 646 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 647 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 648 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 649 /* This failure will trigger a reset - no need to clean up */ 650 if (err) { 651 netif_err(priv, drv, priv->dev, 652 "Failed to unregister queue page list %d\n", 653 priv->qpls[i].id); 654 return err; 655 } 656 } 657 return 0; 658 } 659 660 static int gve_unregister_qpls(struct gve_priv *priv) 661 { 662 int start_id; 663 int err; 664 int i; 665 666 start_id = gve_tx_start_qpl_id(priv); 667 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 668 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 669 /* This failure will trigger a reset - no need to clean up */ 670 if (err) { 671 netif_err(priv, drv, priv->dev, 672 "Failed to unregister queue page list %d\n", 673 priv->qpls[i].id); 674 return err; 675 } 676 } 677 678 start_id = gve_rx_start_qpl_id(priv); 679 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 680 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 681 /* This failure will trigger a reset - no need to clean up */ 682 if (err) { 683 netif_err(priv, drv, priv->dev, 684 "Failed to unregister queue page list %d\n", 685 priv->qpls[i].id); 686 return err; 687 } 688 } 689 return 0; 690 } 691 692 static int gve_create_xdp_rings(struct gve_priv *priv) 693 { 694 int err; 695 696 err = gve_adminq_create_tx_queues(priv, 697 gve_xdp_tx_start_queue_id(priv), 698 priv->num_xdp_queues); 699 if (err) { 700 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 701 priv->num_xdp_queues); 702 /* This failure will trigger a reset - no need to clean 703 * up 704 */ 705 return err; 706 } 707 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 708 priv->num_xdp_queues); 709 710 return 0; 711 } 712 713 static int gve_create_rings(struct gve_priv *priv) 714 { 715 int num_tx_queues = gve_num_tx_queues(priv); 716 int err; 717 int i; 718 719 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 720 if (err) { 721 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 722 num_tx_queues); 723 /* This failure will trigger a reset - no need to clean 724 * up 725 */ 726 return err; 727 } 728 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 729 num_tx_queues); 730 731 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 732 if (err) { 733 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 734 priv->rx_cfg.num_queues); 735 /* This failure will trigger a reset - no need to clean 736 * up 737 */ 738 return err; 739 } 740 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 741 priv->rx_cfg.num_queues); 742 743 if (gve_is_gqi(priv)) { 744 /* Rx data ring has been prefilled with packet buffers at queue 745 * allocation time. 746 * 747 * Write the doorbell to provide descriptor slots and packet 748 * buffers to the NIC. 749 */ 750 for (i = 0; i < priv->rx_cfg.num_queues; i++) 751 gve_rx_write_doorbell(priv, &priv->rx[i]); 752 } else { 753 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 754 /* Post buffers and ring doorbell. */ 755 gve_rx_post_buffers_dqo(&priv->rx[i]); 756 } 757 } 758 759 return 0; 760 } 761 762 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 763 int (*napi_poll)(struct napi_struct *napi, 764 int budget)) 765 { 766 int start_id = gve_xdp_tx_start_queue_id(priv); 767 int i; 768 769 /* Add xdp tx napi & init sync stats*/ 770 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 771 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 772 773 u64_stats_init(&priv->tx[i].statss); 774 priv->tx[i].ntfy_id = ntfy_idx; 775 gve_add_napi(priv, ntfy_idx, napi_poll); 776 } 777 } 778 779 static void add_napi_init_sync_stats(struct gve_priv *priv, 780 int (*napi_poll)(struct napi_struct *napi, 781 int budget)) 782 { 783 int i; 784 785 /* Add tx napi & init sync stats*/ 786 for (i = 0; i < gve_num_tx_queues(priv); i++) { 787 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 788 789 u64_stats_init(&priv->tx[i].statss); 790 priv->tx[i].ntfy_id = ntfy_idx; 791 gve_add_napi(priv, ntfy_idx, napi_poll); 792 } 793 /* Add rx napi & init sync stats*/ 794 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 795 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 796 797 u64_stats_init(&priv->rx[i].statss); 798 priv->rx[i].ntfy_id = ntfy_idx; 799 gve_add_napi(priv, ntfy_idx, napi_poll); 800 } 801 } 802 803 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 804 { 805 if (gve_is_gqi(priv)) { 806 gve_tx_free_rings_gqi(priv, start_id, num_rings); 807 } else { 808 gve_tx_free_rings_dqo(priv); 809 } 810 } 811 812 static int gve_alloc_xdp_rings(struct gve_priv *priv) 813 { 814 int start_id; 815 int err = 0; 816 817 if (!priv->num_xdp_queues) 818 return 0; 819 820 start_id = gve_xdp_tx_start_queue_id(priv); 821 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 822 if (err) 823 return err; 824 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 825 826 return 0; 827 } 828 829 static int gve_alloc_rings(struct gve_priv *priv) 830 { 831 int err; 832 833 /* Setup tx rings */ 834 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 835 GFP_KERNEL); 836 if (!priv->tx) 837 return -ENOMEM; 838 839 if (gve_is_gqi(priv)) 840 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 841 else 842 err = gve_tx_alloc_rings_dqo(priv); 843 if (err) 844 goto free_tx; 845 846 /* Setup rx rings */ 847 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 848 GFP_KERNEL); 849 if (!priv->rx) { 850 err = -ENOMEM; 851 goto free_tx_queue; 852 } 853 854 if (gve_is_gqi(priv)) 855 err = gve_rx_alloc_rings(priv); 856 else 857 err = gve_rx_alloc_rings_dqo(priv); 858 if (err) 859 goto free_rx; 860 861 if (gve_is_gqi(priv)) 862 add_napi_init_sync_stats(priv, gve_napi_poll); 863 else 864 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 865 866 return 0; 867 868 free_rx: 869 kvfree(priv->rx); 870 priv->rx = NULL; 871 free_tx_queue: 872 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 873 free_tx: 874 kvfree(priv->tx); 875 priv->tx = NULL; 876 return err; 877 } 878 879 static int gve_destroy_xdp_rings(struct gve_priv *priv) 880 { 881 int start_id; 882 int err; 883 884 start_id = gve_xdp_tx_start_queue_id(priv); 885 err = gve_adminq_destroy_tx_queues(priv, 886 start_id, 887 priv->num_xdp_queues); 888 if (err) { 889 netif_err(priv, drv, priv->dev, 890 "failed to destroy XDP queues\n"); 891 /* This failure will trigger a reset - no need to clean up */ 892 return err; 893 } 894 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 895 896 return 0; 897 } 898 899 static int gve_destroy_rings(struct gve_priv *priv) 900 { 901 int num_tx_queues = gve_num_tx_queues(priv); 902 int err; 903 904 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 905 if (err) { 906 netif_err(priv, drv, priv->dev, 907 "failed to destroy tx queues\n"); 908 /* This failure will trigger a reset - no need to clean up */ 909 return err; 910 } 911 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 912 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 913 if (err) { 914 netif_err(priv, drv, priv->dev, 915 "failed to destroy rx queues\n"); 916 /* This failure will trigger a reset - no need to clean up */ 917 return err; 918 } 919 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 920 return 0; 921 } 922 923 static void gve_rx_free_rings(struct gve_priv *priv) 924 { 925 if (gve_is_gqi(priv)) 926 gve_rx_free_rings_gqi(priv); 927 else 928 gve_rx_free_rings_dqo(priv); 929 } 930 931 static void gve_free_xdp_rings(struct gve_priv *priv) 932 { 933 int ntfy_idx, start_id; 934 int i; 935 936 start_id = gve_xdp_tx_start_queue_id(priv); 937 if (priv->tx) { 938 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 939 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 940 gve_remove_napi(priv, ntfy_idx); 941 } 942 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 943 } 944 } 945 946 static void gve_free_rings(struct gve_priv *priv) 947 { 948 int num_tx_queues = gve_num_tx_queues(priv); 949 int ntfy_idx; 950 int i; 951 952 if (priv->tx) { 953 for (i = 0; i < num_tx_queues; i++) { 954 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 955 gve_remove_napi(priv, ntfy_idx); 956 } 957 gve_tx_free_rings(priv, 0, num_tx_queues); 958 kvfree(priv->tx); 959 priv->tx = NULL; 960 } 961 if (priv->rx) { 962 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 963 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 964 gve_remove_napi(priv, ntfy_idx); 965 } 966 gve_rx_free_rings(priv); 967 kvfree(priv->rx); 968 priv->rx = NULL; 969 } 970 } 971 972 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 973 struct page **page, dma_addr_t *dma, 974 enum dma_data_direction dir, gfp_t gfp_flags) 975 { 976 *page = alloc_page(gfp_flags); 977 if (!*page) { 978 priv->page_alloc_fail++; 979 return -ENOMEM; 980 } 981 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 982 if (dma_mapping_error(dev, *dma)) { 983 priv->dma_mapping_error++; 984 put_page(*page); 985 return -ENOMEM; 986 } 987 return 0; 988 } 989 990 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 991 int pages) 992 { 993 struct gve_queue_page_list *qpl = &priv->qpls[id]; 994 int err; 995 int i; 996 997 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 998 netif_err(priv, drv, priv->dev, 999 "Reached max number of registered pages %llu > %llu\n", 1000 pages + priv->num_registered_pages, 1001 priv->max_registered_pages); 1002 return -EINVAL; 1003 } 1004 1005 qpl->id = id; 1006 qpl->num_entries = 0; 1007 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1008 /* caller handles clean up */ 1009 if (!qpl->pages) 1010 return -ENOMEM; 1011 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1012 /* caller handles clean up */ 1013 if (!qpl->page_buses) 1014 return -ENOMEM; 1015 1016 for (i = 0; i < pages; i++) { 1017 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1018 &qpl->page_buses[i], 1019 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1020 /* caller handles clean up */ 1021 if (err) 1022 return -ENOMEM; 1023 qpl->num_entries++; 1024 } 1025 priv->num_registered_pages += pages; 1026 1027 return 0; 1028 } 1029 1030 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1031 enum dma_data_direction dir) 1032 { 1033 if (!dma_mapping_error(dev, dma)) 1034 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1035 if (page) 1036 put_page(page); 1037 } 1038 1039 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1040 { 1041 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1042 int i; 1043 1044 if (!qpl->pages) 1045 return; 1046 if (!qpl->page_buses) 1047 goto free_pages; 1048 1049 for (i = 0; i < qpl->num_entries; i++) 1050 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1051 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1052 1053 kvfree(qpl->page_buses); 1054 qpl->page_buses = NULL; 1055 free_pages: 1056 kvfree(qpl->pages); 1057 qpl->pages = NULL; 1058 priv->num_registered_pages -= qpl->num_entries; 1059 } 1060 1061 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1062 { 1063 int start_id; 1064 int i, j; 1065 int err; 1066 1067 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1068 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1069 err = gve_alloc_queue_page_list(priv, i, 1070 priv->tx_pages_per_qpl); 1071 if (err) 1072 goto free_qpls; 1073 } 1074 1075 return 0; 1076 1077 free_qpls: 1078 for (j = start_id; j <= i; j++) 1079 gve_free_queue_page_list(priv, j); 1080 return err; 1081 } 1082 1083 static int gve_alloc_qpls(struct gve_priv *priv) 1084 { 1085 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1086 int start_id; 1087 int i, j; 1088 int err; 1089 1090 if (priv->queue_format != GVE_GQI_QPL_FORMAT) 1091 return 0; 1092 1093 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1094 if (!priv->qpls) 1095 return -ENOMEM; 1096 1097 start_id = gve_tx_start_qpl_id(priv); 1098 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1099 err = gve_alloc_queue_page_list(priv, i, 1100 priv->tx_pages_per_qpl); 1101 if (err) 1102 goto free_qpls; 1103 } 1104 1105 start_id = gve_rx_start_qpl_id(priv); 1106 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1107 err = gve_alloc_queue_page_list(priv, i, 1108 priv->rx_data_slot_cnt); 1109 if (err) 1110 goto free_qpls; 1111 } 1112 1113 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1114 sizeof(unsigned long) * BITS_PER_BYTE; 1115 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1116 sizeof(unsigned long), GFP_KERNEL); 1117 if (!priv->qpl_cfg.qpl_id_map) { 1118 err = -ENOMEM; 1119 goto free_qpls; 1120 } 1121 1122 return 0; 1123 1124 free_qpls: 1125 for (j = 0; j <= i; j++) 1126 gve_free_queue_page_list(priv, j); 1127 kvfree(priv->qpls); 1128 priv->qpls = NULL; 1129 return err; 1130 } 1131 1132 static void gve_free_xdp_qpls(struct gve_priv *priv) 1133 { 1134 int start_id; 1135 int i; 1136 1137 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1138 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1139 gve_free_queue_page_list(priv, i); 1140 } 1141 1142 static void gve_free_qpls(struct gve_priv *priv) 1143 { 1144 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1145 int i; 1146 1147 if (!priv->qpls) 1148 return; 1149 1150 kvfree(priv->qpl_cfg.qpl_id_map); 1151 priv->qpl_cfg.qpl_id_map = NULL; 1152 1153 for (i = 0; i < max_queues; i++) 1154 gve_free_queue_page_list(priv, i); 1155 1156 kvfree(priv->qpls); 1157 priv->qpls = NULL; 1158 } 1159 1160 /* Use this to schedule a reset when the device is capable of continuing 1161 * to handle other requests in its current state. If it is not, do a reset 1162 * in thread instead. 1163 */ 1164 void gve_schedule_reset(struct gve_priv *priv) 1165 { 1166 gve_set_do_reset(priv); 1167 queue_work(priv->gve_wq, &priv->service_task); 1168 } 1169 1170 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1171 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1172 static void gve_turndown(struct gve_priv *priv); 1173 static void gve_turnup(struct gve_priv *priv); 1174 1175 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1176 { 1177 struct napi_struct *napi; 1178 struct gve_rx_ring *rx; 1179 int err = 0; 1180 int i, j; 1181 u32 tx_qid; 1182 1183 if (!priv->num_xdp_queues) 1184 return 0; 1185 1186 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1187 rx = &priv->rx[i]; 1188 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1189 1190 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1191 napi->napi_id); 1192 if (err) 1193 goto err; 1194 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1195 MEM_TYPE_PAGE_SHARED, NULL); 1196 if (err) 1197 goto err; 1198 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1199 if (rx->xsk_pool) { 1200 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1201 napi->napi_id); 1202 if (err) 1203 goto err; 1204 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1205 MEM_TYPE_XSK_BUFF_POOL, NULL); 1206 if (err) 1207 goto err; 1208 xsk_pool_set_rxq_info(rx->xsk_pool, 1209 &rx->xsk_rxq); 1210 } 1211 } 1212 1213 for (i = 0; i < priv->num_xdp_queues; i++) { 1214 tx_qid = gve_xdp_tx_queue_id(priv, i); 1215 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1216 } 1217 return 0; 1218 1219 err: 1220 for (j = i; j >= 0; j--) { 1221 rx = &priv->rx[j]; 1222 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1223 xdp_rxq_info_unreg(&rx->xdp_rxq); 1224 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1225 xdp_rxq_info_unreg(&rx->xsk_rxq); 1226 } 1227 return err; 1228 } 1229 1230 static void gve_unreg_xdp_info(struct gve_priv *priv) 1231 { 1232 int i, tx_qid; 1233 1234 if (!priv->num_xdp_queues) 1235 return; 1236 1237 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1238 struct gve_rx_ring *rx = &priv->rx[i]; 1239 1240 xdp_rxq_info_unreg(&rx->xdp_rxq); 1241 if (rx->xsk_pool) { 1242 xdp_rxq_info_unreg(&rx->xsk_rxq); 1243 rx->xsk_pool = NULL; 1244 } 1245 } 1246 1247 for (i = 0; i < priv->num_xdp_queues; i++) { 1248 tx_qid = gve_xdp_tx_queue_id(priv, i); 1249 priv->tx[tx_qid].xsk_pool = NULL; 1250 } 1251 } 1252 1253 static void gve_drain_page_cache(struct gve_priv *priv) 1254 { 1255 struct page_frag_cache *nc; 1256 int i; 1257 1258 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1259 nc = &priv->rx[i].page_cache; 1260 if (nc->va) { 1261 __page_frag_cache_drain(virt_to_page(nc->va), 1262 nc->pagecnt_bias); 1263 nc->va = NULL; 1264 } 1265 } 1266 } 1267 1268 static int gve_open(struct net_device *dev) 1269 { 1270 struct gve_priv *priv = netdev_priv(dev); 1271 int err; 1272 1273 if (priv->xdp_prog) 1274 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1275 else 1276 priv->num_xdp_queues = 0; 1277 1278 err = gve_alloc_qpls(priv); 1279 if (err) 1280 return err; 1281 1282 err = gve_alloc_rings(priv); 1283 if (err) 1284 goto free_qpls; 1285 1286 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1287 if (err) 1288 goto free_rings; 1289 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1290 if (err) 1291 goto free_rings; 1292 1293 err = gve_reg_xdp_info(priv, dev); 1294 if (err) 1295 goto free_rings; 1296 1297 err = gve_register_qpls(priv); 1298 if (err) 1299 goto reset; 1300 1301 if (!gve_is_gqi(priv)) { 1302 /* Hard code this for now. This may be tuned in the future for 1303 * performance. 1304 */ 1305 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1306 } 1307 err = gve_create_rings(priv); 1308 if (err) 1309 goto reset; 1310 1311 gve_set_device_rings_ok(priv); 1312 1313 if (gve_get_report_stats(priv)) 1314 mod_timer(&priv->stats_report_timer, 1315 round_jiffies(jiffies + 1316 msecs_to_jiffies(priv->stats_report_timer_period))); 1317 1318 gve_turnup(priv); 1319 queue_work(priv->gve_wq, &priv->service_task); 1320 priv->interface_up_cnt++; 1321 return 0; 1322 1323 free_rings: 1324 gve_free_rings(priv); 1325 free_qpls: 1326 gve_free_qpls(priv); 1327 return err; 1328 1329 reset: 1330 /* This must have been called from a reset due to the rtnl lock 1331 * so just return at this point. 1332 */ 1333 if (gve_get_reset_in_progress(priv)) 1334 return err; 1335 /* Otherwise reset before returning */ 1336 gve_reset_and_teardown(priv, true); 1337 /* if this fails there is nothing we can do so just ignore the return */ 1338 gve_reset_recovery(priv, false); 1339 /* return the original error */ 1340 return err; 1341 } 1342 1343 static int gve_close(struct net_device *dev) 1344 { 1345 struct gve_priv *priv = netdev_priv(dev); 1346 int err; 1347 1348 netif_carrier_off(dev); 1349 if (gve_get_device_rings_ok(priv)) { 1350 gve_turndown(priv); 1351 gve_drain_page_cache(priv); 1352 err = gve_destroy_rings(priv); 1353 if (err) 1354 goto err; 1355 err = gve_unregister_qpls(priv); 1356 if (err) 1357 goto err; 1358 gve_clear_device_rings_ok(priv); 1359 } 1360 del_timer_sync(&priv->stats_report_timer); 1361 1362 gve_unreg_xdp_info(priv); 1363 gve_free_rings(priv); 1364 gve_free_qpls(priv); 1365 priv->interface_down_cnt++; 1366 return 0; 1367 1368 err: 1369 /* This must have been called from a reset due to the rtnl lock 1370 * so just return at this point. 1371 */ 1372 if (gve_get_reset_in_progress(priv)) 1373 return err; 1374 /* Otherwise reset before returning */ 1375 gve_reset_and_teardown(priv, true); 1376 return gve_reset_recovery(priv, false); 1377 } 1378 1379 static int gve_remove_xdp_queues(struct gve_priv *priv) 1380 { 1381 int err; 1382 1383 err = gve_destroy_xdp_rings(priv); 1384 if (err) 1385 return err; 1386 1387 err = gve_unregister_xdp_qpls(priv); 1388 if (err) 1389 return err; 1390 1391 gve_unreg_xdp_info(priv); 1392 gve_free_xdp_rings(priv); 1393 gve_free_xdp_qpls(priv); 1394 priv->num_xdp_queues = 0; 1395 return 0; 1396 } 1397 1398 static int gve_add_xdp_queues(struct gve_priv *priv) 1399 { 1400 int err; 1401 1402 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1403 1404 err = gve_alloc_xdp_qpls(priv); 1405 if (err) 1406 goto err; 1407 1408 err = gve_alloc_xdp_rings(priv); 1409 if (err) 1410 goto free_xdp_qpls; 1411 1412 err = gve_reg_xdp_info(priv, priv->dev); 1413 if (err) 1414 goto free_xdp_rings; 1415 1416 err = gve_register_xdp_qpls(priv); 1417 if (err) 1418 goto free_xdp_rings; 1419 1420 err = gve_create_xdp_rings(priv); 1421 if (err) 1422 goto free_xdp_rings; 1423 1424 return 0; 1425 1426 free_xdp_rings: 1427 gve_free_xdp_rings(priv); 1428 free_xdp_qpls: 1429 gve_free_xdp_qpls(priv); 1430 err: 1431 priv->num_xdp_queues = 0; 1432 return err; 1433 } 1434 1435 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1436 { 1437 if (!gve_get_napi_enabled(priv)) 1438 return; 1439 1440 if (link_status == netif_carrier_ok(priv->dev)) 1441 return; 1442 1443 if (link_status) { 1444 netdev_info(priv->dev, "Device link is up.\n"); 1445 netif_carrier_on(priv->dev); 1446 } else { 1447 netdev_info(priv->dev, "Device link is down.\n"); 1448 netif_carrier_off(priv->dev); 1449 } 1450 } 1451 1452 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1453 struct netlink_ext_ack *extack) 1454 { 1455 struct bpf_prog *old_prog; 1456 int err = 0; 1457 u32 status; 1458 1459 old_prog = READ_ONCE(priv->xdp_prog); 1460 if (!netif_carrier_ok(priv->dev)) { 1461 WRITE_ONCE(priv->xdp_prog, prog); 1462 if (old_prog) 1463 bpf_prog_put(old_prog); 1464 return 0; 1465 } 1466 1467 gve_turndown(priv); 1468 if (!old_prog && prog) { 1469 // Allocate XDP TX queues if an XDP program is 1470 // being installed 1471 err = gve_add_xdp_queues(priv); 1472 if (err) 1473 goto out; 1474 } else if (old_prog && !prog) { 1475 // Remove XDP TX queues if an XDP program is 1476 // being uninstalled 1477 err = gve_remove_xdp_queues(priv); 1478 if (err) 1479 goto out; 1480 } 1481 WRITE_ONCE(priv->xdp_prog, prog); 1482 if (old_prog) 1483 bpf_prog_put(old_prog); 1484 1485 out: 1486 gve_turnup(priv); 1487 status = ioread32be(&priv->reg_bar0->device_status); 1488 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1489 return err; 1490 } 1491 1492 static int gve_xsk_pool_enable(struct net_device *dev, 1493 struct xsk_buff_pool *pool, 1494 u16 qid) 1495 { 1496 struct gve_priv *priv = netdev_priv(dev); 1497 struct napi_struct *napi; 1498 struct gve_rx_ring *rx; 1499 int tx_qid; 1500 int err; 1501 1502 if (qid >= priv->rx_cfg.num_queues) { 1503 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1504 return -EINVAL; 1505 } 1506 if (xsk_pool_get_rx_frame_size(pool) < 1507 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1508 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1509 return -EINVAL; 1510 } 1511 1512 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1513 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1514 if (err) 1515 return err; 1516 1517 /* If XDP prog is not installed, return */ 1518 if (!priv->xdp_prog) 1519 return 0; 1520 1521 rx = &priv->rx[qid]; 1522 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1523 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1524 if (err) 1525 goto err; 1526 1527 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1528 MEM_TYPE_XSK_BUFF_POOL, NULL); 1529 if (err) 1530 goto err; 1531 1532 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1533 rx->xsk_pool = pool; 1534 1535 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1536 priv->tx[tx_qid].xsk_pool = pool; 1537 1538 return 0; 1539 err: 1540 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1541 xdp_rxq_info_unreg(&rx->xsk_rxq); 1542 1543 xsk_pool_dma_unmap(pool, 1544 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1545 return err; 1546 } 1547 1548 static int gve_xsk_pool_disable(struct net_device *dev, 1549 u16 qid) 1550 { 1551 struct gve_priv *priv = netdev_priv(dev); 1552 struct napi_struct *napi_rx; 1553 struct napi_struct *napi_tx; 1554 struct xsk_buff_pool *pool; 1555 int tx_qid; 1556 1557 pool = xsk_get_pool_from_qid(dev, qid); 1558 if (!pool) 1559 return -EINVAL; 1560 if (qid >= priv->rx_cfg.num_queues) 1561 return -EINVAL; 1562 1563 /* If XDP prog is not installed, unmap DMA and return */ 1564 if (!priv->xdp_prog) 1565 goto done; 1566 1567 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1568 if (!netif_running(dev)) { 1569 priv->rx[qid].xsk_pool = NULL; 1570 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1571 priv->tx[tx_qid].xsk_pool = NULL; 1572 goto done; 1573 } 1574 1575 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1576 napi_disable(napi_rx); /* make sure current rx poll is done */ 1577 1578 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1579 napi_disable(napi_tx); /* make sure current tx poll is done */ 1580 1581 priv->rx[qid].xsk_pool = NULL; 1582 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1583 priv->tx[tx_qid].xsk_pool = NULL; 1584 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1585 1586 napi_enable(napi_rx); 1587 if (gve_rx_work_pending(&priv->rx[qid])) 1588 napi_schedule(napi_rx); 1589 1590 napi_enable(napi_tx); 1591 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1592 napi_schedule(napi_tx); 1593 1594 done: 1595 xsk_pool_dma_unmap(pool, 1596 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1597 return 0; 1598 } 1599 1600 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1601 { 1602 struct gve_priv *priv = netdev_priv(dev); 1603 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1604 1605 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1606 return -EINVAL; 1607 1608 if (flags & XDP_WAKEUP_TX) { 1609 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1610 struct napi_struct *napi = 1611 &priv->ntfy_blocks[tx->ntfy_id].napi; 1612 1613 if (!napi_if_scheduled_mark_missed(napi)) { 1614 /* Call local_bh_enable to trigger SoftIRQ processing */ 1615 local_bh_disable(); 1616 napi_schedule(napi); 1617 local_bh_enable(); 1618 } 1619 1620 tx->xdp_xsk_wakeup++; 1621 } 1622 1623 return 0; 1624 } 1625 1626 static int verify_xdp_configuration(struct net_device *dev) 1627 { 1628 struct gve_priv *priv = netdev_priv(dev); 1629 1630 if (dev->features & NETIF_F_LRO) { 1631 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1632 return -EOPNOTSUPP; 1633 } 1634 1635 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1636 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1637 priv->queue_format); 1638 return -EOPNOTSUPP; 1639 } 1640 1641 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1642 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1643 dev->mtu); 1644 return -EOPNOTSUPP; 1645 } 1646 1647 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1648 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1649 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1650 priv->rx_cfg.num_queues, 1651 priv->tx_cfg.num_queues, 1652 priv->tx_cfg.max_queues); 1653 return -EINVAL; 1654 } 1655 return 0; 1656 } 1657 1658 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1659 { 1660 struct gve_priv *priv = netdev_priv(dev); 1661 int err; 1662 1663 err = verify_xdp_configuration(dev); 1664 if (err) 1665 return err; 1666 switch (xdp->command) { 1667 case XDP_SETUP_PROG: 1668 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1669 case XDP_SETUP_XSK_POOL: 1670 if (xdp->xsk.pool) 1671 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1672 else 1673 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1674 default: 1675 return -EINVAL; 1676 } 1677 } 1678 1679 int gve_adjust_queues(struct gve_priv *priv, 1680 struct gve_queue_config new_rx_config, 1681 struct gve_queue_config new_tx_config) 1682 { 1683 int err; 1684 1685 if (netif_carrier_ok(priv->dev)) { 1686 /* To make this process as simple as possible we teardown the 1687 * device, set the new configuration, and then bring the device 1688 * up again. 1689 */ 1690 err = gve_close(priv->dev); 1691 /* we have already tried to reset in close, 1692 * just fail at this point 1693 */ 1694 if (err) 1695 return err; 1696 priv->tx_cfg = new_tx_config; 1697 priv->rx_cfg = new_rx_config; 1698 1699 err = gve_open(priv->dev); 1700 if (err) 1701 goto err; 1702 1703 return 0; 1704 } 1705 /* Set the config for the next up. */ 1706 priv->tx_cfg = new_tx_config; 1707 priv->rx_cfg = new_rx_config; 1708 1709 return 0; 1710 err: 1711 netif_err(priv, drv, priv->dev, 1712 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1713 gve_turndown(priv); 1714 return err; 1715 } 1716 1717 static void gve_turndown(struct gve_priv *priv) 1718 { 1719 int idx; 1720 1721 if (netif_carrier_ok(priv->dev)) 1722 netif_carrier_off(priv->dev); 1723 1724 if (!gve_get_napi_enabled(priv)) 1725 return; 1726 1727 /* Disable napi to prevent more work from coming in */ 1728 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1729 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1730 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1731 1732 napi_disable(&block->napi); 1733 } 1734 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1735 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1736 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1737 1738 napi_disable(&block->napi); 1739 } 1740 1741 /* Stop tx queues */ 1742 netif_tx_disable(priv->dev); 1743 1744 gve_clear_napi_enabled(priv); 1745 gve_clear_report_stats(priv); 1746 } 1747 1748 static void gve_turnup(struct gve_priv *priv) 1749 { 1750 int idx; 1751 1752 /* Start the tx queues */ 1753 netif_tx_start_all_queues(priv->dev); 1754 1755 /* Enable napi and unmask interrupts for all queues */ 1756 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1757 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1758 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1759 1760 napi_enable(&block->napi); 1761 if (gve_is_gqi(priv)) { 1762 iowrite32be(0, gve_irq_doorbell(priv, block)); 1763 } else { 1764 gve_set_itr_coalesce_usecs_dqo(priv, block, 1765 priv->tx_coalesce_usecs); 1766 } 1767 } 1768 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1769 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1770 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1771 1772 napi_enable(&block->napi); 1773 if (gve_is_gqi(priv)) { 1774 iowrite32be(0, gve_irq_doorbell(priv, block)); 1775 } else { 1776 gve_set_itr_coalesce_usecs_dqo(priv, block, 1777 priv->rx_coalesce_usecs); 1778 } 1779 } 1780 1781 gve_set_napi_enabled(priv); 1782 } 1783 1784 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1785 { 1786 struct gve_notify_block *block; 1787 struct gve_tx_ring *tx = NULL; 1788 struct gve_priv *priv; 1789 u32 last_nic_done; 1790 u32 current_time; 1791 u32 ntfy_idx; 1792 1793 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1794 priv = netdev_priv(dev); 1795 if (txqueue > priv->tx_cfg.num_queues) 1796 goto reset; 1797 1798 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1799 if (ntfy_idx >= priv->num_ntfy_blks) 1800 goto reset; 1801 1802 block = &priv->ntfy_blocks[ntfy_idx]; 1803 tx = block->tx; 1804 1805 current_time = jiffies_to_msecs(jiffies); 1806 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1807 goto reset; 1808 1809 /* Check to see if there are missed completions, which will allow us to 1810 * kick the queue. 1811 */ 1812 last_nic_done = gve_tx_load_event_counter(priv, tx); 1813 if (last_nic_done - tx->done) { 1814 netdev_info(dev, "Kicking queue %d", txqueue); 1815 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1816 napi_schedule(&block->napi); 1817 tx->last_kick_msec = current_time; 1818 goto out; 1819 } // Else reset. 1820 1821 reset: 1822 gve_schedule_reset(priv); 1823 1824 out: 1825 if (tx) 1826 tx->queue_timeout++; 1827 priv->tx_timeo_cnt++; 1828 } 1829 1830 static int gve_set_features(struct net_device *netdev, 1831 netdev_features_t features) 1832 { 1833 const netdev_features_t orig_features = netdev->features; 1834 struct gve_priv *priv = netdev_priv(netdev); 1835 int err; 1836 1837 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1838 netdev->features ^= NETIF_F_LRO; 1839 if (netif_carrier_ok(netdev)) { 1840 /* To make this process as simple as possible we 1841 * teardown the device, set the new configuration, 1842 * and then bring the device up again. 1843 */ 1844 err = gve_close(netdev); 1845 /* We have already tried to reset in close, just fail 1846 * at this point. 1847 */ 1848 if (err) 1849 goto err; 1850 1851 err = gve_open(netdev); 1852 if (err) 1853 goto err; 1854 } 1855 } 1856 1857 return 0; 1858 err: 1859 /* Reverts the change on error. */ 1860 netdev->features = orig_features; 1861 netif_err(priv, drv, netdev, 1862 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1863 return err; 1864 } 1865 1866 static const struct net_device_ops gve_netdev_ops = { 1867 .ndo_start_xmit = gve_start_xmit, 1868 .ndo_open = gve_open, 1869 .ndo_stop = gve_close, 1870 .ndo_get_stats64 = gve_get_stats, 1871 .ndo_tx_timeout = gve_tx_timeout, 1872 .ndo_set_features = gve_set_features, 1873 .ndo_bpf = gve_xdp, 1874 .ndo_xdp_xmit = gve_xdp_xmit, 1875 .ndo_xsk_wakeup = gve_xsk_wakeup, 1876 }; 1877 1878 static void gve_handle_status(struct gve_priv *priv, u32 status) 1879 { 1880 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1881 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1882 gve_set_do_reset(priv); 1883 } 1884 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1885 priv->stats_report_trigger_cnt++; 1886 gve_set_do_report_stats(priv); 1887 } 1888 } 1889 1890 static void gve_handle_reset(struct gve_priv *priv) 1891 { 1892 /* A service task will be scheduled at the end of probe to catch any 1893 * resets that need to happen, and we don't want to reset until 1894 * probe is done. 1895 */ 1896 if (gve_get_probe_in_progress(priv)) 1897 return; 1898 1899 if (gve_get_do_reset(priv)) { 1900 rtnl_lock(); 1901 gve_reset(priv, false); 1902 rtnl_unlock(); 1903 } 1904 } 1905 1906 void gve_handle_report_stats(struct gve_priv *priv) 1907 { 1908 struct stats *stats = priv->stats_report->stats; 1909 int idx, stats_idx = 0; 1910 unsigned int start = 0; 1911 u64 tx_bytes; 1912 1913 if (!gve_get_report_stats(priv)) 1914 return; 1915 1916 be64_add_cpu(&priv->stats_report->written_count, 1); 1917 /* tx stats */ 1918 if (priv->tx) { 1919 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1920 u32 last_completion = 0; 1921 u32 tx_frames = 0; 1922 1923 /* DQO doesn't currently support these metrics. */ 1924 if (gve_is_gqi(priv)) { 1925 last_completion = priv->tx[idx].done; 1926 tx_frames = priv->tx[idx].req; 1927 } 1928 1929 do { 1930 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1931 tx_bytes = priv->tx[idx].bytes_done; 1932 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1933 stats[stats_idx++] = (struct stats) { 1934 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1935 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1936 .queue_id = cpu_to_be32(idx), 1937 }; 1938 stats[stats_idx++] = (struct stats) { 1939 .stat_name = cpu_to_be32(TX_STOP_CNT), 1940 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1941 .queue_id = cpu_to_be32(idx), 1942 }; 1943 stats[stats_idx++] = (struct stats) { 1944 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1945 .value = cpu_to_be64(tx_frames), 1946 .queue_id = cpu_to_be32(idx), 1947 }; 1948 stats[stats_idx++] = (struct stats) { 1949 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1950 .value = cpu_to_be64(tx_bytes), 1951 .queue_id = cpu_to_be32(idx), 1952 }; 1953 stats[stats_idx++] = (struct stats) { 1954 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1955 .value = cpu_to_be64(last_completion), 1956 .queue_id = cpu_to_be32(idx), 1957 }; 1958 stats[stats_idx++] = (struct stats) { 1959 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1960 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1961 .queue_id = cpu_to_be32(idx), 1962 }; 1963 } 1964 } 1965 /* rx stats */ 1966 if (priv->rx) { 1967 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1968 stats[stats_idx++] = (struct stats) { 1969 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1970 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1971 .queue_id = cpu_to_be32(idx), 1972 }; 1973 stats[stats_idx++] = (struct stats) { 1974 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1975 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1976 .queue_id = cpu_to_be32(idx), 1977 }; 1978 } 1979 } 1980 } 1981 1982 /* Handle NIC status register changes, reset requests and report stats */ 1983 static void gve_service_task(struct work_struct *work) 1984 { 1985 struct gve_priv *priv = container_of(work, struct gve_priv, 1986 service_task); 1987 u32 status = ioread32be(&priv->reg_bar0->device_status); 1988 1989 gve_handle_status(priv, status); 1990 1991 gve_handle_reset(priv); 1992 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1993 } 1994 1995 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 1996 { 1997 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 1998 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 1999 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2000 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2001 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2002 } else { 2003 priv->dev->xdp_features = 0; 2004 } 2005 } 2006 2007 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2008 { 2009 int num_ntfy; 2010 int err; 2011 2012 /* Set up the adminq */ 2013 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2014 if (err) { 2015 dev_err(&priv->pdev->dev, 2016 "Failed to alloc admin queue: err=%d\n", err); 2017 return err; 2018 } 2019 2020 err = gve_verify_driver_compatibility(priv); 2021 if (err) { 2022 dev_err(&priv->pdev->dev, 2023 "Could not verify driver compatibility: err=%d\n", err); 2024 goto err; 2025 } 2026 2027 if (skip_describe_device) 2028 goto setup_device; 2029 2030 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2031 /* Get the initial information we need from the device */ 2032 err = gve_adminq_describe_device(priv); 2033 if (err) { 2034 dev_err(&priv->pdev->dev, 2035 "Could not get device information: err=%d\n", err); 2036 goto err; 2037 } 2038 priv->dev->mtu = priv->dev->max_mtu; 2039 num_ntfy = pci_msix_vec_count(priv->pdev); 2040 if (num_ntfy <= 0) { 2041 dev_err(&priv->pdev->dev, 2042 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2043 err = num_ntfy; 2044 goto err; 2045 } else if (num_ntfy < GVE_MIN_MSIX) { 2046 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2047 GVE_MIN_MSIX, num_ntfy); 2048 err = -EINVAL; 2049 goto err; 2050 } 2051 2052 /* Big TCP is only supported on DQ*/ 2053 if (!gve_is_gqi(priv)) 2054 netif_set_tso_max_size(priv->dev, DQO_TX_MAX); 2055 2056 priv->num_registered_pages = 0; 2057 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2058 /* gvnic has one Notification Block per MSI-x vector, except for the 2059 * management vector 2060 */ 2061 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2062 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2063 2064 priv->tx_cfg.max_queues = 2065 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2066 priv->rx_cfg.max_queues = 2067 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2068 2069 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2070 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2071 if (priv->default_num_queues > 0) { 2072 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2073 priv->tx_cfg.num_queues); 2074 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2075 priv->rx_cfg.num_queues); 2076 } 2077 2078 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2079 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2080 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2081 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2082 2083 if (!gve_is_gqi(priv)) { 2084 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2085 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2086 } 2087 2088 setup_device: 2089 gve_set_netdev_xdp_features(priv); 2090 err = gve_setup_device_resources(priv); 2091 if (!err) 2092 return 0; 2093 err: 2094 gve_adminq_free(&priv->pdev->dev, priv); 2095 return err; 2096 } 2097 2098 static void gve_teardown_priv_resources(struct gve_priv *priv) 2099 { 2100 gve_teardown_device_resources(priv); 2101 gve_adminq_free(&priv->pdev->dev, priv); 2102 } 2103 2104 static void gve_trigger_reset(struct gve_priv *priv) 2105 { 2106 /* Reset the device by releasing the AQ */ 2107 gve_adminq_release(priv); 2108 } 2109 2110 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2111 { 2112 gve_trigger_reset(priv); 2113 /* With the reset having already happened, close cannot fail */ 2114 if (was_up) 2115 gve_close(priv->dev); 2116 gve_teardown_priv_resources(priv); 2117 } 2118 2119 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2120 { 2121 int err; 2122 2123 err = gve_init_priv(priv, true); 2124 if (err) 2125 goto err; 2126 if (was_up) { 2127 err = gve_open(priv->dev); 2128 if (err) 2129 goto err; 2130 } 2131 return 0; 2132 err: 2133 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2134 gve_turndown(priv); 2135 return err; 2136 } 2137 2138 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2139 { 2140 bool was_up = netif_carrier_ok(priv->dev); 2141 int err; 2142 2143 dev_info(&priv->pdev->dev, "Performing reset\n"); 2144 gve_clear_do_reset(priv); 2145 gve_set_reset_in_progress(priv); 2146 /* If we aren't attempting to teardown normally, just go turndown and 2147 * reset right away. 2148 */ 2149 if (!attempt_teardown) { 2150 gve_turndown(priv); 2151 gve_reset_and_teardown(priv, was_up); 2152 } else { 2153 /* Otherwise attempt to close normally */ 2154 if (was_up) { 2155 err = gve_close(priv->dev); 2156 /* If that fails reset as we did above */ 2157 if (err) 2158 gve_reset_and_teardown(priv, was_up); 2159 } 2160 /* Clean up any remaining resources */ 2161 gve_teardown_priv_resources(priv); 2162 } 2163 2164 /* Set it all back up */ 2165 err = gve_reset_recovery(priv, was_up); 2166 gve_clear_reset_in_progress(priv); 2167 priv->reset_cnt++; 2168 priv->interface_up_cnt = 0; 2169 priv->interface_down_cnt = 0; 2170 priv->stats_report_trigger_cnt = 0; 2171 return err; 2172 } 2173 2174 static void gve_write_version(u8 __iomem *driver_version_register) 2175 { 2176 const char *c = gve_version_prefix; 2177 2178 while (*c) { 2179 writeb(*c, driver_version_register); 2180 c++; 2181 } 2182 2183 c = gve_version_str; 2184 while (*c) { 2185 writeb(*c, driver_version_register); 2186 c++; 2187 } 2188 writeb('\n', driver_version_register); 2189 } 2190 2191 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2192 { 2193 int max_tx_queues, max_rx_queues; 2194 struct net_device *dev; 2195 __be32 __iomem *db_bar; 2196 struct gve_registers __iomem *reg_bar; 2197 struct gve_priv *priv; 2198 int err; 2199 2200 err = pci_enable_device(pdev); 2201 if (err) 2202 return err; 2203 2204 err = pci_request_regions(pdev, gve_driver_name); 2205 if (err) 2206 goto abort_with_enabled; 2207 2208 pci_set_master(pdev); 2209 2210 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2211 if (err) { 2212 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2213 goto abort_with_pci_region; 2214 } 2215 2216 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2217 if (!reg_bar) { 2218 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2219 err = -ENOMEM; 2220 goto abort_with_pci_region; 2221 } 2222 2223 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2224 if (!db_bar) { 2225 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2226 err = -ENOMEM; 2227 goto abort_with_reg_bar; 2228 } 2229 2230 gve_write_version(®_bar->driver_version); 2231 /* Get max queues to alloc etherdev */ 2232 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2233 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2234 /* Alloc and setup the netdev and priv */ 2235 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2236 if (!dev) { 2237 dev_err(&pdev->dev, "could not allocate netdev\n"); 2238 err = -ENOMEM; 2239 goto abort_with_db_bar; 2240 } 2241 SET_NETDEV_DEV(dev, &pdev->dev); 2242 pci_set_drvdata(pdev, dev); 2243 dev->ethtool_ops = &gve_ethtool_ops; 2244 dev->netdev_ops = &gve_netdev_ops; 2245 2246 /* Set default and supported features. 2247 * 2248 * Features might be set in other locations as well (such as 2249 * `gve_adminq_describe_device`). 2250 */ 2251 dev->hw_features = NETIF_F_HIGHDMA; 2252 dev->hw_features |= NETIF_F_SG; 2253 dev->hw_features |= NETIF_F_HW_CSUM; 2254 dev->hw_features |= NETIF_F_TSO; 2255 dev->hw_features |= NETIF_F_TSO6; 2256 dev->hw_features |= NETIF_F_TSO_ECN; 2257 dev->hw_features |= NETIF_F_RXCSUM; 2258 dev->hw_features |= NETIF_F_RXHASH; 2259 dev->features = dev->hw_features; 2260 dev->watchdog_timeo = 5 * HZ; 2261 dev->min_mtu = ETH_MIN_MTU; 2262 netif_carrier_off(dev); 2263 2264 priv = netdev_priv(dev); 2265 priv->dev = dev; 2266 priv->pdev = pdev; 2267 priv->msg_enable = DEFAULT_MSG_LEVEL; 2268 priv->reg_bar0 = reg_bar; 2269 priv->db_bar2 = db_bar; 2270 priv->service_task_flags = 0x0; 2271 priv->state_flags = 0x0; 2272 priv->ethtool_flags = 0x0; 2273 2274 gve_set_probe_in_progress(priv); 2275 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2276 if (!priv->gve_wq) { 2277 dev_err(&pdev->dev, "Could not allocate workqueue"); 2278 err = -ENOMEM; 2279 goto abort_with_netdev; 2280 } 2281 INIT_WORK(&priv->service_task, gve_service_task); 2282 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2283 priv->tx_cfg.max_queues = max_tx_queues; 2284 priv->rx_cfg.max_queues = max_rx_queues; 2285 2286 err = gve_init_priv(priv, false); 2287 if (err) 2288 goto abort_with_wq; 2289 2290 err = register_netdev(dev); 2291 if (err) 2292 goto abort_with_gve_init; 2293 2294 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2295 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2296 gve_clear_probe_in_progress(priv); 2297 queue_work(priv->gve_wq, &priv->service_task); 2298 return 0; 2299 2300 abort_with_gve_init: 2301 gve_teardown_priv_resources(priv); 2302 2303 abort_with_wq: 2304 destroy_workqueue(priv->gve_wq); 2305 2306 abort_with_netdev: 2307 free_netdev(dev); 2308 2309 abort_with_db_bar: 2310 pci_iounmap(pdev, db_bar); 2311 2312 abort_with_reg_bar: 2313 pci_iounmap(pdev, reg_bar); 2314 2315 abort_with_pci_region: 2316 pci_release_regions(pdev); 2317 2318 abort_with_enabled: 2319 pci_disable_device(pdev); 2320 return err; 2321 } 2322 2323 static void gve_remove(struct pci_dev *pdev) 2324 { 2325 struct net_device *netdev = pci_get_drvdata(pdev); 2326 struct gve_priv *priv = netdev_priv(netdev); 2327 __be32 __iomem *db_bar = priv->db_bar2; 2328 void __iomem *reg_bar = priv->reg_bar0; 2329 2330 unregister_netdev(netdev); 2331 gve_teardown_priv_resources(priv); 2332 destroy_workqueue(priv->gve_wq); 2333 free_netdev(netdev); 2334 pci_iounmap(pdev, db_bar); 2335 pci_iounmap(pdev, reg_bar); 2336 pci_release_regions(pdev); 2337 pci_disable_device(pdev); 2338 } 2339 2340 static void gve_shutdown(struct pci_dev *pdev) 2341 { 2342 struct net_device *netdev = pci_get_drvdata(pdev); 2343 struct gve_priv *priv = netdev_priv(netdev); 2344 bool was_up = netif_carrier_ok(priv->dev); 2345 2346 rtnl_lock(); 2347 if (was_up && gve_close(priv->dev)) { 2348 /* If the dev was up, attempt to close, if close fails, reset */ 2349 gve_reset_and_teardown(priv, was_up); 2350 } else { 2351 /* If the dev wasn't up or close worked, finish tearing down */ 2352 gve_teardown_priv_resources(priv); 2353 } 2354 rtnl_unlock(); 2355 } 2356 2357 #ifdef CONFIG_PM 2358 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2359 { 2360 struct net_device *netdev = pci_get_drvdata(pdev); 2361 struct gve_priv *priv = netdev_priv(netdev); 2362 bool was_up = netif_carrier_ok(priv->dev); 2363 2364 priv->suspend_cnt++; 2365 rtnl_lock(); 2366 if (was_up && gve_close(priv->dev)) { 2367 /* If the dev was up, attempt to close, if close fails, reset */ 2368 gve_reset_and_teardown(priv, was_up); 2369 } else { 2370 /* If the dev wasn't up or close worked, finish tearing down */ 2371 gve_teardown_priv_resources(priv); 2372 } 2373 priv->up_before_suspend = was_up; 2374 rtnl_unlock(); 2375 return 0; 2376 } 2377 2378 static int gve_resume(struct pci_dev *pdev) 2379 { 2380 struct net_device *netdev = pci_get_drvdata(pdev); 2381 struct gve_priv *priv = netdev_priv(netdev); 2382 int err; 2383 2384 priv->resume_cnt++; 2385 rtnl_lock(); 2386 err = gve_reset_recovery(priv, priv->up_before_suspend); 2387 rtnl_unlock(); 2388 return err; 2389 } 2390 #endif /* CONFIG_PM */ 2391 2392 static const struct pci_device_id gve_id_table[] = { 2393 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2394 { } 2395 }; 2396 2397 static struct pci_driver gve_driver = { 2398 .name = gve_driver_name, 2399 .id_table = gve_id_table, 2400 .probe = gve_probe, 2401 .remove = gve_remove, 2402 .shutdown = gve_shutdown, 2403 #ifdef CONFIG_PM 2404 .suspend = gve_suspend, 2405 .resume = gve_resume, 2406 #endif 2407 }; 2408 2409 module_pci_driver(gve_driver); 2410 2411 MODULE_DEVICE_TABLE(pci, gve_id_table); 2412 MODULE_AUTHOR("Google, Inc."); 2413 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2414 MODULE_LICENSE("Dual MIT/GPL"); 2415 MODULE_VERSION(GVE_VERSION); 2416