1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25
26 #define GVE_DEFAULT_RX_COPYBREAK (256)
27
28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION "1.0.0"
30 #define GVE_VERSION_PREFIX "GVE-"
31
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34
35 char gve_driver_name[] = "gve";
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
gve_verify_driver_compatibility(struct gve_priv * priv)39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41 int err;
42 struct gve_driver_info *driver_info;
43 dma_addr_t driver_info_bus;
44
45 driver_info = dma_alloc_coherent(&priv->pdev->dev,
46 sizeof(struct gve_driver_info),
47 &driver_info_bus, GFP_KERNEL);
48 if (!driver_info)
49 return -ENOMEM;
50
51 *driver_info = (struct gve_driver_info) {
52 .os_type = 1, /* Linux */
53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56 .driver_capability_flags = {
57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61 },
62 };
63 strscpy(driver_info->os_version_str1, utsname()->release,
64 sizeof(driver_info->os_version_str1));
65 strscpy(driver_info->os_version_str2, utsname()->version,
66 sizeof(driver_info->os_version_str2));
67
68 err = gve_adminq_verify_driver_compatibility(priv,
69 sizeof(struct gve_driver_info),
70 driver_info_bus);
71
72 /* It's ok if the device doesn't support this */
73 if (err == -EOPNOTSUPP)
74 err = 0;
75
76 dma_free_coherent(&priv->pdev->dev,
77 sizeof(struct gve_driver_info),
78 driver_info, driver_info_bus);
79 return err;
80 }
81
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
83 {
84 struct gve_priv *priv = netdev_priv(dev);
85
86 if (gve_is_gqi(priv))
87 return gve_tx(skb, dev);
88 else
89 return gve_tx_dqo(skb, dev);
90 }
91
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
93 {
94 struct gve_priv *priv = netdev_priv(dev);
95 unsigned int start;
96 u64 packets, bytes;
97 int num_tx_queues;
98 int ring;
99
100 num_tx_queues = gve_num_tx_queues(priv);
101 if (priv->rx) {
102 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
103 do {
104 start =
105 u64_stats_fetch_begin(&priv->rx[ring].statss);
106 packets = priv->rx[ring].rpackets;
107 bytes = priv->rx[ring].rbytes;
108 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
109 start));
110 s->rx_packets += packets;
111 s->rx_bytes += bytes;
112 }
113 }
114 if (priv->tx) {
115 for (ring = 0; ring < num_tx_queues; ring++) {
116 do {
117 start =
118 u64_stats_fetch_begin(&priv->tx[ring].statss);
119 packets = priv->tx[ring].pkt_done;
120 bytes = priv->tx[ring].bytes_done;
121 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
122 start));
123 s->tx_packets += packets;
124 s->tx_bytes += bytes;
125 }
126 }
127 }
128
gve_alloc_counter_array(struct gve_priv * priv)129 static int gve_alloc_counter_array(struct gve_priv *priv)
130 {
131 priv->counter_array =
132 dma_alloc_coherent(&priv->pdev->dev,
133 priv->num_event_counters *
134 sizeof(*priv->counter_array),
135 &priv->counter_array_bus, GFP_KERNEL);
136 if (!priv->counter_array)
137 return -ENOMEM;
138
139 return 0;
140 }
141
gve_free_counter_array(struct gve_priv * priv)142 static void gve_free_counter_array(struct gve_priv *priv)
143 {
144 if (!priv->counter_array)
145 return;
146
147 dma_free_coherent(&priv->pdev->dev,
148 priv->num_event_counters *
149 sizeof(*priv->counter_array),
150 priv->counter_array, priv->counter_array_bus);
151 priv->counter_array = NULL;
152 }
153
154 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)155 static void gve_stats_report_task(struct work_struct *work)
156 {
157 struct gve_priv *priv = container_of(work, struct gve_priv,
158 stats_report_task);
159 if (gve_get_do_report_stats(priv)) {
160 gve_handle_report_stats(priv);
161 gve_clear_do_report_stats(priv);
162 }
163 }
164
gve_stats_report_schedule(struct gve_priv * priv)165 static void gve_stats_report_schedule(struct gve_priv *priv)
166 {
167 if (!gve_get_probe_in_progress(priv) &&
168 !gve_get_reset_in_progress(priv)) {
169 gve_set_do_report_stats(priv);
170 queue_work(priv->gve_wq, &priv->stats_report_task);
171 }
172 }
173
gve_stats_report_timer(struct timer_list * t)174 static void gve_stats_report_timer(struct timer_list *t)
175 {
176 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
177
178 mod_timer(&priv->stats_report_timer,
179 round_jiffies(jiffies +
180 msecs_to_jiffies(priv->stats_report_timer_period)));
181 gve_stats_report_schedule(priv);
182 }
183
gve_alloc_stats_report(struct gve_priv * priv)184 static int gve_alloc_stats_report(struct gve_priv *priv)
185 {
186 int tx_stats_num, rx_stats_num;
187
188 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
189 gve_num_tx_queues(priv);
190 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
191 priv->rx_cfg.num_queues;
192 priv->stats_report_len = struct_size(priv->stats_report, stats,
193 size_add(tx_stats_num, rx_stats_num));
194 priv->stats_report =
195 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
196 &priv->stats_report_bus, GFP_KERNEL);
197 if (!priv->stats_report)
198 return -ENOMEM;
199 /* Set up timer for the report-stats task */
200 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
201 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
202 return 0;
203 }
204
gve_free_stats_report(struct gve_priv * priv)205 static void gve_free_stats_report(struct gve_priv *priv)
206 {
207 if (!priv->stats_report)
208 return;
209
210 del_timer_sync(&priv->stats_report_timer);
211 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
212 priv->stats_report, priv->stats_report_bus);
213 priv->stats_report = NULL;
214 }
215
gve_mgmnt_intr(int irq,void * arg)216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
217 {
218 struct gve_priv *priv = arg;
219
220 queue_work(priv->gve_wq, &priv->service_task);
221 return IRQ_HANDLED;
222 }
223
gve_intr(int irq,void * arg)224 static irqreturn_t gve_intr(int irq, void *arg)
225 {
226 struct gve_notify_block *block = arg;
227 struct gve_priv *priv = block->priv;
228
229 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
230 napi_schedule_irqoff(&block->napi);
231 return IRQ_HANDLED;
232 }
233
gve_intr_dqo(int irq,void * arg)234 static irqreturn_t gve_intr_dqo(int irq, void *arg)
235 {
236 struct gve_notify_block *block = arg;
237
238 /* Interrupts are automatically masked */
239 napi_schedule_irqoff(&block->napi);
240 return IRQ_HANDLED;
241 }
242
gve_napi_poll(struct napi_struct * napi,int budget)243 static int gve_napi_poll(struct napi_struct *napi, int budget)
244 {
245 struct gve_notify_block *block;
246 __be32 __iomem *irq_doorbell;
247 bool reschedule = false;
248 struct gve_priv *priv;
249 int work_done = 0;
250
251 block = container_of(napi, struct gve_notify_block, napi);
252 priv = block->priv;
253
254 if (block->tx) {
255 if (block->tx->q_num < priv->tx_cfg.num_queues)
256 reschedule |= gve_tx_poll(block, budget);
257 else if (budget)
258 reschedule |= gve_xdp_poll(block, budget);
259 }
260
261 if (!budget)
262 return 0;
263
264 if (block->rx) {
265 work_done = gve_rx_poll(block, budget);
266 reschedule |= work_done == budget;
267 }
268
269 if (reschedule)
270 return budget;
271
272 /* Complete processing - don't unmask irq if busy polling is enabled */
273 if (likely(napi_complete_done(napi, work_done))) {
274 irq_doorbell = gve_irq_doorbell(priv, block);
275 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
276
277 /* Ensure IRQ ACK is visible before we check pending work.
278 * If queue had issued updates, it would be truly visible.
279 */
280 mb();
281
282 if (block->tx)
283 reschedule |= gve_tx_clean_pending(priv, block->tx);
284 if (block->rx)
285 reschedule |= gve_rx_work_pending(block->rx);
286
287 if (reschedule && napi_reschedule(napi))
288 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
289 }
290 return work_done;
291 }
292
gve_napi_poll_dqo(struct napi_struct * napi,int budget)293 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
294 {
295 struct gve_notify_block *block =
296 container_of(napi, struct gve_notify_block, napi);
297 struct gve_priv *priv = block->priv;
298 bool reschedule = false;
299 int work_done = 0;
300
301 if (block->tx)
302 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
303
304 if (!budget)
305 return 0;
306
307 if (block->rx) {
308 work_done = gve_rx_poll_dqo(block, budget);
309 reschedule |= work_done == budget;
310 }
311
312 if (reschedule)
313 return budget;
314
315 if (likely(napi_complete_done(napi, work_done))) {
316 /* Enable interrupts again.
317 *
318 * We don't need to repoll afterwards because HW supports the
319 * PCI MSI-X PBA feature.
320 *
321 * Another interrupt would be triggered if a new event came in
322 * since the last one.
323 */
324 gve_write_irq_doorbell_dqo(priv, block,
325 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
326 }
327
328 return work_done;
329 }
330
gve_alloc_notify_blocks(struct gve_priv * priv)331 static int gve_alloc_notify_blocks(struct gve_priv *priv)
332 {
333 int num_vecs_requested = priv->num_ntfy_blks + 1;
334 unsigned int active_cpus;
335 int vecs_enabled;
336 int i, j;
337 int err;
338
339 priv->msix_vectors = kvcalloc(num_vecs_requested,
340 sizeof(*priv->msix_vectors), GFP_KERNEL);
341 if (!priv->msix_vectors)
342 return -ENOMEM;
343 for (i = 0; i < num_vecs_requested; i++)
344 priv->msix_vectors[i].entry = i;
345 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
346 GVE_MIN_MSIX, num_vecs_requested);
347 if (vecs_enabled < 0) {
348 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
349 GVE_MIN_MSIX, vecs_enabled);
350 err = vecs_enabled;
351 goto abort_with_msix_vectors;
352 }
353 if (vecs_enabled != num_vecs_requested) {
354 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
355 int vecs_per_type = new_num_ntfy_blks / 2;
356 int vecs_left = new_num_ntfy_blks % 2;
357
358 priv->num_ntfy_blks = new_num_ntfy_blks;
359 priv->mgmt_msix_idx = priv->num_ntfy_blks;
360 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
361 vecs_per_type);
362 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
363 vecs_per_type + vecs_left);
364 dev_err(&priv->pdev->dev,
365 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
366 vecs_enabled, priv->tx_cfg.max_queues,
367 priv->rx_cfg.max_queues);
368 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
369 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
370 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
371 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
372 }
373 /* Half the notification blocks go to TX and half to RX */
374 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
375
376 /* Setup Management Vector - the last vector */
377 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
378 pci_name(priv->pdev));
379 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
380 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
381 if (err) {
382 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
383 goto abort_with_msix_enabled;
384 }
385 priv->irq_db_indices =
386 dma_alloc_coherent(&priv->pdev->dev,
387 priv->num_ntfy_blks *
388 sizeof(*priv->irq_db_indices),
389 &priv->irq_db_indices_bus, GFP_KERNEL);
390 if (!priv->irq_db_indices) {
391 err = -ENOMEM;
392 goto abort_with_mgmt_vector;
393 }
394
395 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
396 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
397 if (!priv->ntfy_blocks) {
398 err = -ENOMEM;
399 goto abort_with_irq_db_indices;
400 }
401
402 /* Setup the other blocks - the first n-1 vectors */
403 for (i = 0; i < priv->num_ntfy_blks; i++) {
404 struct gve_notify_block *block = &priv->ntfy_blocks[i];
405 int msix_idx = i;
406
407 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
408 i, pci_name(priv->pdev));
409 block->priv = priv;
410 err = request_irq(priv->msix_vectors[msix_idx].vector,
411 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
412 0, block->name, block);
413 if (err) {
414 dev_err(&priv->pdev->dev,
415 "Failed to receive msix vector %d\n", i);
416 goto abort_with_some_ntfy_blocks;
417 }
418 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
419 get_cpu_mask(i % active_cpus));
420 block->irq_db_index = &priv->irq_db_indices[i].index;
421 }
422 return 0;
423 abort_with_some_ntfy_blocks:
424 for (j = 0; j < i; j++) {
425 struct gve_notify_block *block = &priv->ntfy_blocks[j];
426 int msix_idx = j;
427
428 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
429 NULL);
430 free_irq(priv->msix_vectors[msix_idx].vector, block);
431 }
432 kvfree(priv->ntfy_blocks);
433 priv->ntfy_blocks = NULL;
434 abort_with_irq_db_indices:
435 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
436 sizeof(*priv->irq_db_indices),
437 priv->irq_db_indices, priv->irq_db_indices_bus);
438 priv->irq_db_indices = NULL;
439 abort_with_mgmt_vector:
440 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
441 abort_with_msix_enabled:
442 pci_disable_msix(priv->pdev);
443 abort_with_msix_vectors:
444 kvfree(priv->msix_vectors);
445 priv->msix_vectors = NULL;
446 return err;
447 }
448
gve_free_notify_blocks(struct gve_priv * priv)449 static void gve_free_notify_blocks(struct gve_priv *priv)
450 {
451 int i;
452
453 if (!priv->msix_vectors)
454 return;
455
456 /* Free the irqs */
457 for (i = 0; i < priv->num_ntfy_blks; i++) {
458 struct gve_notify_block *block = &priv->ntfy_blocks[i];
459 int msix_idx = i;
460
461 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
462 NULL);
463 free_irq(priv->msix_vectors[msix_idx].vector, block);
464 }
465 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
466 kvfree(priv->ntfy_blocks);
467 priv->ntfy_blocks = NULL;
468 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
469 sizeof(*priv->irq_db_indices),
470 priv->irq_db_indices, priv->irq_db_indices_bus);
471 priv->irq_db_indices = NULL;
472 pci_disable_msix(priv->pdev);
473 kvfree(priv->msix_vectors);
474 priv->msix_vectors = NULL;
475 }
476
gve_setup_device_resources(struct gve_priv * priv)477 static int gve_setup_device_resources(struct gve_priv *priv)
478 {
479 int err;
480
481 err = gve_alloc_counter_array(priv);
482 if (err)
483 return err;
484 err = gve_alloc_notify_blocks(priv);
485 if (err)
486 goto abort_with_counter;
487 err = gve_alloc_stats_report(priv);
488 if (err)
489 goto abort_with_ntfy_blocks;
490 err = gve_adminq_configure_device_resources(priv,
491 priv->counter_array_bus,
492 priv->num_event_counters,
493 priv->irq_db_indices_bus,
494 priv->num_ntfy_blks);
495 if (unlikely(err)) {
496 dev_err(&priv->pdev->dev,
497 "could not setup device_resources: err=%d\n", err);
498 err = -ENXIO;
499 goto abort_with_stats_report;
500 }
501
502 if (!gve_is_gqi(priv)) {
503 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
504 GFP_KERNEL);
505 if (!priv->ptype_lut_dqo) {
506 err = -ENOMEM;
507 goto abort_with_stats_report;
508 }
509 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
510 if (err) {
511 dev_err(&priv->pdev->dev,
512 "Failed to get ptype map: err=%d\n", err);
513 goto abort_with_ptype_lut;
514 }
515 }
516
517 err = gve_adminq_report_stats(priv, priv->stats_report_len,
518 priv->stats_report_bus,
519 GVE_STATS_REPORT_TIMER_PERIOD);
520 if (err)
521 dev_err(&priv->pdev->dev,
522 "Failed to report stats: err=%d\n", err);
523 gve_set_device_resources_ok(priv);
524 return 0;
525
526 abort_with_ptype_lut:
527 kvfree(priv->ptype_lut_dqo);
528 priv->ptype_lut_dqo = NULL;
529 abort_with_stats_report:
530 gve_free_stats_report(priv);
531 abort_with_ntfy_blocks:
532 gve_free_notify_blocks(priv);
533 abort_with_counter:
534 gve_free_counter_array(priv);
535
536 return err;
537 }
538
539 static void gve_trigger_reset(struct gve_priv *priv);
540
gve_teardown_device_resources(struct gve_priv * priv)541 static void gve_teardown_device_resources(struct gve_priv *priv)
542 {
543 int err;
544
545 /* Tell device its resources are being freed */
546 if (gve_get_device_resources_ok(priv)) {
547 /* detach the stats report */
548 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
549 if (err) {
550 dev_err(&priv->pdev->dev,
551 "Failed to detach stats report: err=%d\n", err);
552 gve_trigger_reset(priv);
553 }
554 err = gve_adminq_deconfigure_device_resources(priv);
555 if (err) {
556 dev_err(&priv->pdev->dev,
557 "Could not deconfigure device resources: err=%d\n",
558 err);
559 gve_trigger_reset(priv);
560 }
561 }
562
563 kvfree(priv->ptype_lut_dqo);
564 priv->ptype_lut_dqo = NULL;
565
566 gve_free_counter_array(priv);
567 gve_free_notify_blocks(priv);
568 gve_free_stats_report(priv);
569 gve_clear_device_resources_ok(priv);
570 }
571
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))572 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
573 int (*gve_poll)(struct napi_struct *, int))
574 {
575 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
576
577 netif_napi_add(priv->dev, &block->napi, gve_poll);
578 }
579
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)580 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
581 {
582 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
583
584 netif_napi_del(&block->napi);
585 }
586
gve_register_xdp_qpls(struct gve_priv * priv)587 static int gve_register_xdp_qpls(struct gve_priv *priv)
588 {
589 int start_id;
590 int err;
591 int i;
592
593 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
594 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
595 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
596 if (err) {
597 netif_err(priv, drv, priv->dev,
598 "failed to register queue page list %d\n",
599 priv->qpls[i].id);
600 /* This failure will trigger a reset - no need to clean
601 * up
602 */
603 return err;
604 }
605 }
606 return 0;
607 }
608
gve_register_qpls(struct gve_priv * priv)609 static int gve_register_qpls(struct gve_priv *priv)
610 {
611 int start_id;
612 int err;
613 int i;
614
615 start_id = gve_tx_start_qpl_id(priv);
616 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
617 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
618 if (err) {
619 netif_err(priv, drv, priv->dev,
620 "failed to register queue page list %d\n",
621 priv->qpls[i].id);
622 /* This failure will trigger a reset - no need to clean
623 * up
624 */
625 return err;
626 }
627 }
628
629 start_id = gve_rx_start_qpl_id(priv);
630 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
631 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
632 if (err) {
633 netif_err(priv, drv, priv->dev,
634 "failed to register queue page list %d\n",
635 priv->qpls[i].id);
636 /* This failure will trigger a reset - no need to clean
637 * up
638 */
639 return err;
640 }
641 }
642 return 0;
643 }
644
gve_unregister_xdp_qpls(struct gve_priv * priv)645 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
646 {
647 int start_id;
648 int err;
649 int i;
650
651 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
652 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
653 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
654 /* This failure will trigger a reset - no need to clean up */
655 if (err) {
656 netif_err(priv, drv, priv->dev,
657 "Failed to unregister queue page list %d\n",
658 priv->qpls[i].id);
659 return err;
660 }
661 }
662 return 0;
663 }
664
gve_unregister_qpls(struct gve_priv * priv)665 static int gve_unregister_qpls(struct gve_priv *priv)
666 {
667 int start_id;
668 int err;
669 int i;
670
671 start_id = gve_tx_start_qpl_id(priv);
672 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
673 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
674 /* This failure will trigger a reset - no need to clean up */
675 if (err) {
676 netif_err(priv, drv, priv->dev,
677 "Failed to unregister queue page list %d\n",
678 priv->qpls[i].id);
679 return err;
680 }
681 }
682
683 start_id = gve_rx_start_qpl_id(priv);
684 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
685 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
686 /* This failure will trigger a reset - no need to clean up */
687 if (err) {
688 netif_err(priv, drv, priv->dev,
689 "Failed to unregister queue page list %d\n",
690 priv->qpls[i].id);
691 return err;
692 }
693 }
694 return 0;
695 }
696
gve_create_xdp_rings(struct gve_priv * priv)697 static int gve_create_xdp_rings(struct gve_priv *priv)
698 {
699 int err;
700
701 err = gve_adminq_create_tx_queues(priv,
702 gve_xdp_tx_start_queue_id(priv),
703 priv->num_xdp_queues);
704 if (err) {
705 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
706 priv->num_xdp_queues);
707 /* This failure will trigger a reset - no need to clean
708 * up
709 */
710 return err;
711 }
712 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
713 priv->num_xdp_queues);
714
715 return 0;
716 }
717
gve_create_rings(struct gve_priv * priv)718 static int gve_create_rings(struct gve_priv *priv)
719 {
720 int num_tx_queues = gve_num_tx_queues(priv);
721 int err;
722 int i;
723
724 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
725 if (err) {
726 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
727 num_tx_queues);
728 /* This failure will trigger a reset - no need to clean
729 * up
730 */
731 return err;
732 }
733 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
734 num_tx_queues);
735
736 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
737 if (err) {
738 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
739 priv->rx_cfg.num_queues);
740 /* This failure will trigger a reset - no need to clean
741 * up
742 */
743 return err;
744 }
745 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
746 priv->rx_cfg.num_queues);
747
748 if (gve_is_gqi(priv)) {
749 /* Rx data ring has been prefilled with packet buffers at queue
750 * allocation time.
751 *
752 * Write the doorbell to provide descriptor slots and packet
753 * buffers to the NIC.
754 */
755 for (i = 0; i < priv->rx_cfg.num_queues; i++)
756 gve_rx_write_doorbell(priv, &priv->rx[i]);
757 } else {
758 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
759 /* Post buffers and ring doorbell. */
760 gve_rx_post_buffers_dqo(&priv->rx[i]);
761 }
762 }
763
764 return 0;
765 }
766
add_napi_init_xdp_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))767 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
768 int (*napi_poll)(struct napi_struct *napi,
769 int budget))
770 {
771 int start_id = gve_xdp_tx_start_queue_id(priv);
772 int i;
773
774 /* Add xdp tx napi & init sync stats*/
775 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
776 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
777
778 u64_stats_init(&priv->tx[i].statss);
779 priv->tx[i].ntfy_id = ntfy_idx;
780 gve_add_napi(priv, ntfy_idx, napi_poll);
781 }
782 }
783
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))784 static void add_napi_init_sync_stats(struct gve_priv *priv,
785 int (*napi_poll)(struct napi_struct *napi,
786 int budget))
787 {
788 int i;
789
790 /* Add tx napi & init sync stats*/
791 for (i = 0; i < gve_num_tx_queues(priv); i++) {
792 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
793
794 u64_stats_init(&priv->tx[i].statss);
795 priv->tx[i].ntfy_id = ntfy_idx;
796 gve_add_napi(priv, ntfy_idx, napi_poll);
797 }
798 /* Add rx napi & init sync stats*/
799 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
800 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
801
802 u64_stats_init(&priv->rx[i].statss);
803 priv->rx[i].ntfy_id = ntfy_idx;
804 gve_add_napi(priv, ntfy_idx, napi_poll);
805 }
806 }
807
gve_tx_free_rings(struct gve_priv * priv,int start_id,int num_rings)808 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
809 {
810 if (gve_is_gqi(priv)) {
811 gve_tx_free_rings_gqi(priv, start_id, num_rings);
812 } else {
813 gve_tx_free_rings_dqo(priv);
814 }
815 }
816
gve_alloc_xdp_rings(struct gve_priv * priv)817 static int gve_alloc_xdp_rings(struct gve_priv *priv)
818 {
819 int start_id;
820 int err = 0;
821
822 if (!priv->num_xdp_queues)
823 return 0;
824
825 start_id = gve_xdp_tx_start_queue_id(priv);
826 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
827 if (err)
828 return err;
829 add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
830
831 return 0;
832 }
833
gve_alloc_rings(struct gve_priv * priv)834 static int gve_alloc_rings(struct gve_priv *priv)
835 {
836 int err;
837
838 /* Setup tx rings */
839 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
840 GFP_KERNEL);
841 if (!priv->tx)
842 return -ENOMEM;
843
844 if (gve_is_gqi(priv))
845 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
846 else
847 err = gve_tx_alloc_rings_dqo(priv);
848 if (err)
849 goto free_tx;
850
851 /* Setup rx rings */
852 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
853 GFP_KERNEL);
854 if (!priv->rx) {
855 err = -ENOMEM;
856 goto free_tx_queue;
857 }
858
859 if (gve_is_gqi(priv))
860 err = gve_rx_alloc_rings(priv);
861 else
862 err = gve_rx_alloc_rings_dqo(priv);
863 if (err)
864 goto free_rx;
865
866 if (gve_is_gqi(priv))
867 add_napi_init_sync_stats(priv, gve_napi_poll);
868 else
869 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
870
871 return 0;
872
873 free_rx:
874 kvfree(priv->rx);
875 priv->rx = NULL;
876 free_tx_queue:
877 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
878 free_tx:
879 kvfree(priv->tx);
880 priv->tx = NULL;
881 return err;
882 }
883
gve_destroy_xdp_rings(struct gve_priv * priv)884 static int gve_destroy_xdp_rings(struct gve_priv *priv)
885 {
886 int start_id;
887 int err;
888
889 start_id = gve_xdp_tx_start_queue_id(priv);
890 err = gve_adminq_destroy_tx_queues(priv,
891 start_id,
892 priv->num_xdp_queues);
893 if (err) {
894 netif_err(priv, drv, priv->dev,
895 "failed to destroy XDP queues\n");
896 /* This failure will trigger a reset - no need to clean up */
897 return err;
898 }
899 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
900
901 return 0;
902 }
903
gve_destroy_rings(struct gve_priv * priv)904 static int gve_destroy_rings(struct gve_priv *priv)
905 {
906 int num_tx_queues = gve_num_tx_queues(priv);
907 int err;
908
909 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
910 if (err) {
911 netif_err(priv, drv, priv->dev,
912 "failed to destroy tx queues\n");
913 /* This failure will trigger a reset - no need to clean up */
914 return err;
915 }
916 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
917 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
918 if (err) {
919 netif_err(priv, drv, priv->dev,
920 "failed to destroy rx queues\n");
921 /* This failure will trigger a reset - no need to clean up */
922 return err;
923 }
924 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
925 return 0;
926 }
927
gve_rx_free_rings(struct gve_priv * priv)928 static void gve_rx_free_rings(struct gve_priv *priv)
929 {
930 if (gve_is_gqi(priv))
931 gve_rx_free_rings_gqi(priv);
932 else
933 gve_rx_free_rings_dqo(priv);
934 }
935
gve_free_xdp_rings(struct gve_priv * priv)936 static void gve_free_xdp_rings(struct gve_priv *priv)
937 {
938 int ntfy_idx, start_id;
939 int i;
940
941 start_id = gve_xdp_tx_start_queue_id(priv);
942 if (priv->tx) {
943 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
944 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
945 gve_remove_napi(priv, ntfy_idx);
946 }
947 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
948 }
949 }
950
gve_free_rings(struct gve_priv * priv)951 static void gve_free_rings(struct gve_priv *priv)
952 {
953 int num_tx_queues = gve_num_tx_queues(priv);
954 int ntfy_idx;
955 int i;
956
957 if (priv->tx) {
958 for (i = 0; i < num_tx_queues; i++) {
959 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
960 gve_remove_napi(priv, ntfy_idx);
961 }
962 gve_tx_free_rings(priv, 0, num_tx_queues);
963 kvfree(priv->tx);
964 priv->tx = NULL;
965 }
966 if (priv->rx) {
967 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
968 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
969 gve_remove_napi(priv, ntfy_idx);
970 }
971 gve_rx_free_rings(priv);
972 kvfree(priv->rx);
973 priv->rx = NULL;
974 }
975 }
976
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)977 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
978 struct page **page, dma_addr_t *dma,
979 enum dma_data_direction dir, gfp_t gfp_flags)
980 {
981 *page = alloc_page(gfp_flags);
982 if (!*page) {
983 priv->page_alloc_fail++;
984 return -ENOMEM;
985 }
986 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
987 if (dma_mapping_error(dev, *dma)) {
988 priv->dma_mapping_error++;
989 put_page(*page);
990 return -ENOMEM;
991 }
992 return 0;
993 }
994
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)995 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
996 int pages)
997 {
998 struct gve_queue_page_list *qpl = &priv->qpls[id];
999 int err;
1000 int i;
1001
1002 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
1003 netif_err(priv, drv, priv->dev,
1004 "Reached max number of registered pages %llu > %llu\n",
1005 pages + priv->num_registered_pages,
1006 priv->max_registered_pages);
1007 return -EINVAL;
1008 }
1009
1010 qpl->id = id;
1011 qpl->num_entries = 0;
1012 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1013 /* caller handles clean up */
1014 if (!qpl->pages)
1015 return -ENOMEM;
1016 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1017 /* caller handles clean up */
1018 if (!qpl->page_buses)
1019 return -ENOMEM;
1020
1021 for (i = 0; i < pages; i++) {
1022 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1023 &qpl->page_buses[i],
1024 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1025 /* caller handles clean up */
1026 if (err)
1027 return -ENOMEM;
1028 qpl->num_entries++;
1029 }
1030 priv->num_registered_pages += pages;
1031
1032 return 0;
1033 }
1034
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1035 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1036 enum dma_data_direction dir)
1037 {
1038 if (!dma_mapping_error(dev, dma))
1039 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1040 if (page)
1041 put_page(page);
1042 }
1043
gve_free_queue_page_list(struct gve_priv * priv,u32 id)1044 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1045 {
1046 struct gve_queue_page_list *qpl = &priv->qpls[id];
1047 int i;
1048
1049 if (!qpl->pages)
1050 return;
1051 if (!qpl->page_buses)
1052 goto free_pages;
1053
1054 for (i = 0; i < qpl->num_entries; i++)
1055 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1056 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1057
1058 kvfree(qpl->page_buses);
1059 qpl->page_buses = NULL;
1060 free_pages:
1061 kvfree(qpl->pages);
1062 qpl->pages = NULL;
1063 priv->num_registered_pages -= qpl->num_entries;
1064 }
1065
gve_alloc_xdp_qpls(struct gve_priv * priv)1066 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1067 {
1068 int start_id;
1069 int i, j;
1070 int err;
1071
1072 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1073 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1074 err = gve_alloc_queue_page_list(priv, i,
1075 priv->tx_pages_per_qpl);
1076 if (err)
1077 goto free_qpls;
1078 }
1079
1080 return 0;
1081
1082 free_qpls:
1083 for (j = start_id; j <= i; j++)
1084 gve_free_queue_page_list(priv, j);
1085 return err;
1086 }
1087
gve_alloc_qpls(struct gve_priv * priv)1088 static int gve_alloc_qpls(struct gve_priv *priv)
1089 {
1090 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1091 int page_count;
1092 int start_id;
1093 int i, j;
1094 int err;
1095
1096 if (!gve_is_qpl(priv))
1097 return 0;
1098
1099 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1100 if (!priv->qpls)
1101 return -ENOMEM;
1102
1103 start_id = gve_tx_start_qpl_id(priv);
1104 page_count = priv->tx_pages_per_qpl;
1105 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1106 err = gve_alloc_queue_page_list(priv, i,
1107 page_count);
1108 if (err)
1109 goto free_qpls;
1110 }
1111
1112 start_id = gve_rx_start_qpl_id(priv);
1113
1114 /* For GQI_QPL number of pages allocated have 1:1 relationship with
1115 * number of descriptors. For DQO, number of pages required are
1116 * more than descriptors (because of out of order completions).
1117 */
1118 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1119 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1120 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1121 err = gve_alloc_queue_page_list(priv, i,
1122 page_count);
1123 if (err)
1124 goto free_qpls;
1125 }
1126
1127 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1128 sizeof(unsigned long) * BITS_PER_BYTE;
1129 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1130 sizeof(unsigned long), GFP_KERNEL);
1131 if (!priv->qpl_cfg.qpl_id_map) {
1132 err = -ENOMEM;
1133 goto free_qpls;
1134 }
1135
1136 return 0;
1137
1138 free_qpls:
1139 for (j = 0; j <= i; j++)
1140 gve_free_queue_page_list(priv, j);
1141 kvfree(priv->qpls);
1142 priv->qpls = NULL;
1143 return err;
1144 }
1145
gve_free_xdp_qpls(struct gve_priv * priv)1146 static void gve_free_xdp_qpls(struct gve_priv *priv)
1147 {
1148 int start_id;
1149 int i;
1150
1151 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1152 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1153 gve_free_queue_page_list(priv, i);
1154 }
1155
gve_free_qpls(struct gve_priv * priv)1156 static void gve_free_qpls(struct gve_priv *priv)
1157 {
1158 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1159 int i;
1160
1161 if (!priv->qpls)
1162 return;
1163
1164 kvfree(priv->qpl_cfg.qpl_id_map);
1165 priv->qpl_cfg.qpl_id_map = NULL;
1166
1167 for (i = 0; i < max_queues; i++)
1168 gve_free_queue_page_list(priv, i);
1169
1170 kvfree(priv->qpls);
1171 priv->qpls = NULL;
1172 }
1173
1174 /* Use this to schedule a reset when the device is capable of continuing
1175 * to handle other requests in its current state. If it is not, do a reset
1176 * in thread instead.
1177 */
gve_schedule_reset(struct gve_priv * priv)1178 void gve_schedule_reset(struct gve_priv *priv)
1179 {
1180 gve_set_do_reset(priv);
1181 queue_work(priv->gve_wq, &priv->service_task);
1182 }
1183
1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1186 static void gve_turndown(struct gve_priv *priv);
1187 static void gve_turnup(struct gve_priv *priv);
1188
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1190 {
1191 struct napi_struct *napi;
1192 struct gve_rx_ring *rx;
1193 int err = 0;
1194 int i, j;
1195 u32 tx_qid;
1196
1197 if (!priv->num_xdp_queues)
1198 return 0;
1199
1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1201 rx = &priv->rx[i];
1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1203
1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1205 napi->napi_id);
1206 if (err)
1207 goto err;
1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1209 MEM_TYPE_PAGE_SHARED, NULL);
1210 if (err)
1211 goto err;
1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1213 if (rx->xsk_pool) {
1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1215 napi->napi_id);
1216 if (err)
1217 goto err;
1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1219 MEM_TYPE_XSK_BUFF_POOL, NULL);
1220 if (err)
1221 goto err;
1222 xsk_pool_set_rxq_info(rx->xsk_pool,
1223 &rx->xsk_rxq);
1224 }
1225 }
1226
1227 for (i = 0; i < priv->num_xdp_queues; i++) {
1228 tx_qid = gve_xdp_tx_queue_id(priv, i);
1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1230 }
1231 return 0;
1232
1233 err:
1234 for (j = i; j >= 0; j--) {
1235 rx = &priv->rx[j];
1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1237 xdp_rxq_info_unreg(&rx->xdp_rxq);
1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1239 xdp_rxq_info_unreg(&rx->xsk_rxq);
1240 }
1241 return err;
1242 }
1243
gve_unreg_xdp_info(struct gve_priv * priv)1244 static void gve_unreg_xdp_info(struct gve_priv *priv)
1245 {
1246 int i, tx_qid;
1247
1248 if (!priv->num_xdp_queues)
1249 return;
1250
1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1252 struct gve_rx_ring *rx = &priv->rx[i];
1253
1254 xdp_rxq_info_unreg(&rx->xdp_rxq);
1255 if (rx->xsk_pool) {
1256 xdp_rxq_info_unreg(&rx->xsk_rxq);
1257 rx->xsk_pool = NULL;
1258 }
1259 }
1260
1261 for (i = 0; i < priv->num_xdp_queues; i++) {
1262 tx_qid = gve_xdp_tx_queue_id(priv, i);
1263 priv->tx[tx_qid].xsk_pool = NULL;
1264 }
1265 }
1266
gve_drain_page_cache(struct gve_priv * priv)1267 static void gve_drain_page_cache(struct gve_priv *priv)
1268 {
1269 struct page_frag_cache *nc;
1270 int i;
1271
1272 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1273 nc = &priv->rx[i].page_cache;
1274 if (nc->va) {
1275 __page_frag_cache_drain(virt_to_page(nc->va),
1276 nc->pagecnt_bias);
1277 nc->va = NULL;
1278 }
1279 }
1280 }
1281
gve_open(struct net_device * dev)1282 static int gve_open(struct net_device *dev)
1283 {
1284 struct gve_priv *priv = netdev_priv(dev);
1285 int err;
1286
1287 if (priv->xdp_prog)
1288 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1289 else
1290 priv->num_xdp_queues = 0;
1291
1292 err = gve_alloc_qpls(priv);
1293 if (err)
1294 return err;
1295
1296 err = gve_alloc_rings(priv);
1297 if (err)
1298 goto free_qpls;
1299
1300 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1301 if (err)
1302 goto free_rings;
1303 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1304 if (err)
1305 goto free_rings;
1306
1307 err = gve_reg_xdp_info(priv, dev);
1308 if (err)
1309 goto free_rings;
1310
1311 err = gve_register_qpls(priv);
1312 if (err)
1313 goto reset;
1314
1315 if (!gve_is_gqi(priv)) {
1316 /* Hard code this for now. This may be tuned in the future for
1317 * performance.
1318 */
1319 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1320 }
1321 err = gve_create_rings(priv);
1322 if (err)
1323 goto reset;
1324
1325 gve_set_device_rings_ok(priv);
1326
1327 if (gve_get_report_stats(priv))
1328 mod_timer(&priv->stats_report_timer,
1329 round_jiffies(jiffies +
1330 msecs_to_jiffies(priv->stats_report_timer_period)));
1331
1332 gve_turnup(priv);
1333 queue_work(priv->gve_wq, &priv->service_task);
1334 priv->interface_up_cnt++;
1335 return 0;
1336
1337 free_rings:
1338 gve_free_rings(priv);
1339 free_qpls:
1340 gve_free_qpls(priv);
1341 return err;
1342
1343 reset:
1344 /* This must have been called from a reset due to the rtnl lock
1345 * so just return at this point.
1346 */
1347 if (gve_get_reset_in_progress(priv))
1348 return err;
1349 /* Otherwise reset before returning */
1350 gve_reset_and_teardown(priv, true);
1351 /* if this fails there is nothing we can do so just ignore the return */
1352 gve_reset_recovery(priv, false);
1353 /* return the original error */
1354 return err;
1355 }
1356
gve_close(struct net_device * dev)1357 static int gve_close(struct net_device *dev)
1358 {
1359 struct gve_priv *priv = netdev_priv(dev);
1360 int err;
1361
1362 netif_carrier_off(dev);
1363 if (gve_get_device_rings_ok(priv)) {
1364 gve_turndown(priv);
1365 gve_drain_page_cache(priv);
1366 err = gve_destroy_rings(priv);
1367 if (err)
1368 goto err;
1369 err = gve_unregister_qpls(priv);
1370 if (err)
1371 goto err;
1372 gve_clear_device_rings_ok(priv);
1373 }
1374 del_timer_sync(&priv->stats_report_timer);
1375
1376 gve_unreg_xdp_info(priv);
1377 gve_free_rings(priv);
1378 gve_free_qpls(priv);
1379 priv->interface_down_cnt++;
1380 return 0;
1381
1382 err:
1383 /* This must have been called from a reset due to the rtnl lock
1384 * so just return at this point.
1385 */
1386 if (gve_get_reset_in_progress(priv))
1387 return err;
1388 /* Otherwise reset before returning */
1389 gve_reset_and_teardown(priv, true);
1390 return gve_reset_recovery(priv, false);
1391 }
1392
gve_remove_xdp_queues(struct gve_priv * priv)1393 static int gve_remove_xdp_queues(struct gve_priv *priv)
1394 {
1395 int err;
1396
1397 err = gve_destroy_xdp_rings(priv);
1398 if (err)
1399 return err;
1400
1401 err = gve_unregister_xdp_qpls(priv);
1402 if (err)
1403 return err;
1404
1405 gve_unreg_xdp_info(priv);
1406 gve_free_xdp_rings(priv);
1407 gve_free_xdp_qpls(priv);
1408 priv->num_xdp_queues = 0;
1409 return 0;
1410 }
1411
gve_add_xdp_queues(struct gve_priv * priv)1412 static int gve_add_xdp_queues(struct gve_priv *priv)
1413 {
1414 int err;
1415
1416 priv->num_xdp_queues = priv->tx_cfg.num_queues;
1417
1418 err = gve_alloc_xdp_qpls(priv);
1419 if (err)
1420 goto err;
1421
1422 err = gve_alloc_xdp_rings(priv);
1423 if (err)
1424 goto free_xdp_qpls;
1425
1426 err = gve_reg_xdp_info(priv, priv->dev);
1427 if (err)
1428 goto free_xdp_rings;
1429
1430 err = gve_register_xdp_qpls(priv);
1431 if (err)
1432 goto free_xdp_rings;
1433
1434 err = gve_create_xdp_rings(priv);
1435 if (err)
1436 goto free_xdp_rings;
1437
1438 return 0;
1439
1440 free_xdp_rings:
1441 gve_free_xdp_rings(priv);
1442 free_xdp_qpls:
1443 gve_free_xdp_qpls(priv);
1444 err:
1445 priv->num_xdp_queues = 0;
1446 return err;
1447 }
1448
gve_handle_link_status(struct gve_priv * priv,bool link_status)1449 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1450 {
1451 if (!gve_get_napi_enabled(priv))
1452 return;
1453
1454 if (link_status == netif_carrier_ok(priv->dev))
1455 return;
1456
1457 if (link_status) {
1458 netdev_info(priv->dev, "Device link is up.\n");
1459 netif_carrier_on(priv->dev);
1460 } else {
1461 netdev_info(priv->dev, "Device link is down.\n");
1462 netif_carrier_off(priv->dev);
1463 }
1464 }
1465
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1466 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1467 struct netlink_ext_ack *extack)
1468 {
1469 struct bpf_prog *old_prog;
1470 int err = 0;
1471 u32 status;
1472
1473 old_prog = READ_ONCE(priv->xdp_prog);
1474 if (!netif_carrier_ok(priv->dev)) {
1475 WRITE_ONCE(priv->xdp_prog, prog);
1476 if (old_prog)
1477 bpf_prog_put(old_prog);
1478 return 0;
1479 }
1480
1481 gve_turndown(priv);
1482 if (!old_prog && prog) {
1483 // Allocate XDP TX queues if an XDP program is
1484 // being installed
1485 err = gve_add_xdp_queues(priv);
1486 if (err)
1487 goto out;
1488 } else if (old_prog && !prog) {
1489 // Remove XDP TX queues if an XDP program is
1490 // being uninstalled
1491 err = gve_remove_xdp_queues(priv);
1492 if (err)
1493 goto out;
1494 }
1495 WRITE_ONCE(priv->xdp_prog, prog);
1496 if (old_prog)
1497 bpf_prog_put(old_prog);
1498
1499 out:
1500 gve_turnup(priv);
1501 status = ioread32be(&priv->reg_bar0->device_status);
1502 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1503 return err;
1504 }
1505
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1506 static int gve_xsk_pool_enable(struct net_device *dev,
1507 struct xsk_buff_pool *pool,
1508 u16 qid)
1509 {
1510 struct gve_priv *priv = netdev_priv(dev);
1511 struct napi_struct *napi;
1512 struct gve_rx_ring *rx;
1513 int tx_qid;
1514 int err;
1515
1516 if (qid >= priv->rx_cfg.num_queues) {
1517 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1518 return -EINVAL;
1519 }
1520 if (xsk_pool_get_rx_frame_size(pool) <
1521 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1522 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1523 return -EINVAL;
1524 }
1525
1526 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1527 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1528 if (err)
1529 return err;
1530
1531 /* If XDP prog is not installed or interface is down, return. */
1532 if (!priv->xdp_prog || !netif_running(dev))
1533 return 0;
1534
1535 rx = &priv->rx[qid];
1536 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1537 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1538 if (err)
1539 goto err;
1540
1541 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1542 MEM_TYPE_XSK_BUFF_POOL, NULL);
1543 if (err)
1544 goto err;
1545
1546 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1547 rx->xsk_pool = pool;
1548
1549 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1550 priv->tx[tx_qid].xsk_pool = pool;
1551
1552 return 0;
1553 err:
1554 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1555 xdp_rxq_info_unreg(&rx->xsk_rxq);
1556
1557 xsk_pool_dma_unmap(pool,
1558 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1559 return err;
1560 }
1561
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1562 static int gve_xsk_pool_disable(struct net_device *dev,
1563 u16 qid)
1564 {
1565 struct gve_priv *priv = netdev_priv(dev);
1566 struct napi_struct *napi_rx;
1567 struct napi_struct *napi_tx;
1568 struct xsk_buff_pool *pool;
1569 int tx_qid;
1570
1571 pool = xsk_get_pool_from_qid(dev, qid);
1572 if (!pool)
1573 return -EINVAL;
1574 if (qid >= priv->rx_cfg.num_queues)
1575 return -EINVAL;
1576
1577 /* If XDP prog is not installed or interface is down, unmap DMA and
1578 * return.
1579 */
1580 if (!priv->xdp_prog || !netif_running(dev))
1581 goto done;
1582
1583 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1584 napi_disable(napi_rx); /* make sure current rx poll is done */
1585
1586 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1587 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1588 napi_disable(napi_tx); /* make sure current tx poll is done */
1589
1590 priv->rx[qid].xsk_pool = NULL;
1591 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1592 priv->tx[tx_qid].xsk_pool = NULL;
1593 smp_mb(); /* Make sure it is visible to the workers on datapath */
1594
1595 napi_enable(napi_rx);
1596 if (gve_rx_work_pending(&priv->rx[qid]))
1597 napi_schedule(napi_rx);
1598
1599 napi_enable(napi_tx);
1600 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1601 napi_schedule(napi_tx);
1602
1603 done:
1604 xsk_pool_dma_unmap(pool,
1605 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1606 return 0;
1607 }
1608
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1609 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1610 {
1611 struct gve_priv *priv = netdev_priv(dev);
1612 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1613
1614 if (!gve_get_napi_enabled(priv))
1615 return -ENETDOWN;
1616
1617 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1618 return -EINVAL;
1619
1620 if (flags & XDP_WAKEUP_TX) {
1621 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1622 struct napi_struct *napi =
1623 &priv->ntfy_blocks[tx->ntfy_id].napi;
1624
1625 if (!napi_if_scheduled_mark_missed(napi)) {
1626 /* Call local_bh_enable to trigger SoftIRQ processing */
1627 local_bh_disable();
1628 napi_schedule(napi);
1629 local_bh_enable();
1630 }
1631
1632 tx->xdp_xsk_wakeup++;
1633 }
1634
1635 return 0;
1636 }
1637
verify_xdp_configuration(struct net_device * dev)1638 static int verify_xdp_configuration(struct net_device *dev)
1639 {
1640 struct gve_priv *priv = netdev_priv(dev);
1641
1642 if (dev->features & NETIF_F_LRO) {
1643 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1644 return -EOPNOTSUPP;
1645 }
1646
1647 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1648 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1649 priv->queue_format);
1650 return -EOPNOTSUPP;
1651 }
1652
1653 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
1654 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1655 dev->mtu);
1656 return -EOPNOTSUPP;
1657 }
1658
1659 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1660 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1661 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1662 priv->rx_cfg.num_queues,
1663 priv->tx_cfg.num_queues,
1664 priv->tx_cfg.max_queues);
1665 return -EINVAL;
1666 }
1667 return 0;
1668 }
1669
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1670 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1671 {
1672 struct gve_priv *priv = netdev_priv(dev);
1673 int err;
1674
1675 err = verify_xdp_configuration(dev);
1676 if (err)
1677 return err;
1678 switch (xdp->command) {
1679 case XDP_SETUP_PROG:
1680 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1681 case XDP_SETUP_XSK_POOL:
1682 if (xdp->xsk.pool)
1683 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1684 else
1685 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1686 default:
1687 return -EINVAL;
1688 }
1689 }
1690
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1691 int gve_adjust_queues(struct gve_priv *priv,
1692 struct gve_queue_config new_rx_config,
1693 struct gve_queue_config new_tx_config)
1694 {
1695 int err;
1696
1697 if (netif_carrier_ok(priv->dev)) {
1698 /* To make this process as simple as possible we teardown the
1699 * device, set the new configuration, and then bring the device
1700 * up again.
1701 */
1702 err = gve_close(priv->dev);
1703 /* we have already tried to reset in close,
1704 * just fail at this point
1705 */
1706 if (err)
1707 return err;
1708 priv->tx_cfg = new_tx_config;
1709 priv->rx_cfg = new_rx_config;
1710
1711 err = gve_open(priv->dev);
1712 if (err)
1713 goto err;
1714
1715 return 0;
1716 }
1717 /* Set the config for the next up. */
1718 priv->tx_cfg = new_tx_config;
1719 priv->rx_cfg = new_rx_config;
1720
1721 return 0;
1722 err:
1723 netif_err(priv, drv, priv->dev,
1724 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1725 gve_turndown(priv);
1726 return err;
1727 }
1728
gve_turndown(struct gve_priv * priv)1729 static void gve_turndown(struct gve_priv *priv)
1730 {
1731 int idx;
1732
1733 if (netif_carrier_ok(priv->dev))
1734 netif_carrier_off(priv->dev);
1735
1736 if (!gve_get_napi_enabled(priv))
1737 return;
1738
1739 /* Disable napi to prevent more work from coming in */
1740 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1741 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1742 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1743
1744 napi_disable(&block->napi);
1745 }
1746 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1747 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1748 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1749
1750 napi_disable(&block->napi);
1751 }
1752
1753 /* Stop tx queues */
1754 netif_tx_disable(priv->dev);
1755
1756 xdp_features_clear_redirect_target(priv->dev);
1757
1758 gve_clear_napi_enabled(priv);
1759 gve_clear_report_stats(priv);
1760
1761 /* Make sure that all traffic is finished processing. */
1762 synchronize_net();
1763 }
1764
gve_turnup(struct gve_priv * priv)1765 static void gve_turnup(struct gve_priv *priv)
1766 {
1767 int idx;
1768
1769 /* Start the tx queues */
1770 netif_tx_start_all_queues(priv->dev);
1771
1772 /* Enable napi and unmask interrupts for all queues */
1773 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1774 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1775 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1776
1777 napi_enable(&block->napi);
1778 if (gve_is_gqi(priv)) {
1779 iowrite32be(0, gve_irq_doorbell(priv, block));
1780 } else {
1781 gve_set_itr_coalesce_usecs_dqo(priv, block,
1782 priv->tx_coalesce_usecs);
1783 }
1784 }
1785 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1786 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1787 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1788
1789 napi_enable(&block->napi);
1790 if (gve_is_gqi(priv)) {
1791 iowrite32be(0, gve_irq_doorbell(priv, block));
1792 } else {
1793 gve_set_itr_coalesce_usecs_dqo(priv, block,
1794 priv->rx_coalesce_usecs);
1795 }
1796 }
1797
1798 if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv))
1799 xdp_features_set_redirect_target(priv->dev, false);
1800
1801 gve_set_napi_enabled(priv);
1802 }
1803
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1804 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1805 {
1806 struct gve_notify_block *block;
1807 struct gve_tx_ring *tx = NULL;
1808 struct gve_priv *priv;
1809 u32 last_nic_done;
1810 u32 current_time;
1811 u32 ntfy_idx;
1812
1813 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1814 priv = netdev_priv(dev);
1815 if (txqueue > priv->tx_cfg.num_queues)
1816 goto reset;
1817
1818 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1819 if (ntfy_idx >= priv->num_ntfy_blks)
1820 goto reset;
1821
1822 block = &priv->ntfy_blocks[ntfy_idx];
1823 tx = block->tx;
1824
1825 current_time = jiffies_to_msecs(jiffies);
1826 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1827 goto reset;
1828
1829 /* Check to see if there are missed completions, which will allow us to
1830 * kick the queue.
1831 */
1832 last_nic_done = gve_tx_load_event_counter(priv, tx);
1833 if (last_nic_done - tx->done) {
1834 netdev_info(dev, "Kicking queue %d", txqueue);
1835 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1836 napi_schedule(&block->napi);
1837 tx->last_kick_msec = current_time;
1838 goto out;
1839 } // Else reset.
1840
1841 reset:
1842 gve_schedule_reset(priv);
1843
1844 out:
1845 if (tx)
1846 tx->queue_timeout++;
1847 priv->tx_timeo_cnt++;
1848 }
1849
gve_set_features(struct net_device * netdev,netdev_features_t features)1850 static int gve_set_features(struct net_device *netdev,
1851 netdev_features_t features)
1852 {
1853 const netdev_features_t orig_features = netdev->features;
1854 struct gve_priv *priv = netdev_priv(netdev);
1855 int err;
1856
1857 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1858 netdev->features ^= NETIF_F_LRO;
1859 if (netif_carrier_ok(netdev)) {
1860 /* To make this process as simple as possible we
1861 * teardown the device, set the new configuration,
1862 * and then bring the device up again.
1863 */
1864 err = gve_close(netdev);
1865 /* We have already tried to reset in close, just fail
1866 * at this point.
1867 */
1868 if (err)
1869 goto err;
1870
1871 err = gve_open(netdev);
1872 if (err)
1873 goto err;
1874 }
1875 }
1876
1877 return 0;
1878 err:
1879 /* Reverts the change on error. */
1880 netdev->features = orig_features;
1881 netif_err(priv, drv, netdev,
1882 "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1883 return err;
1884 }
1885
1886 static const struct net_device_ops gve_netdev_ops = {
1887 .ndo_start_xmit = gve_start_xmit,
1888 .ndo_open = gve_open,
1889 .ndo_stop = gve_close,
1890 .ndo_get_stats64 = gve_get_stats,
1891 .ndo_tx_timeout = gve_tx_timeout,
1892 .ndo_set_features = gve_set_features,
1893 .ndo_bpf = gve_xdp,
1894 .ndo_xdp_xmit = gve_xdp_xmit,
1895 .ndo_xsk_wakeup = gve_xsk_wakeup,
1896 };
1897
gve_handle_status(struct gve_priv * priv,u32 status)1898 static void gve_handle_status(struct gve_priv *priv, u32 status)
1899 {
1900 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1901 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1902 gve_set_do_reset(priv);
1903 }
1904 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1905 priv->stats_report_trigger_cnt++;
1906 gve_set_do_report_stats(priv);
1907 }
1908 }
1909
gve_handle_reset(struct gve_priv * priv)1910 static void gve_handle_reset(struct gve_priv *priv)
1911 {
1912 /* A service task will be scheduled at the end of probe to catch any
1913 * resets that need to happen, and we don't want to reset until
1914 * probe is done.
1915 */
1916 if (gve_get_probe_in_progress(priv))
1917 return;
1918
1919 if (gve_get_do_reset(priv)) {
1920 rtnl_lock();
1921 gve_reset(priv, false);
1922 rtnl_unlock();
1923 }
1924 }
1925
gve_handle_report_stats(struct gve_priv * priv)1926 void gve_handle_report_stats(struct gve_priv *priv)
1927 {
1928 struct stats *stats = priv->stats_report->stats;
1929 int idx, stats_idx = 0;
1930 unsigned int start = 0;
1931 u64 tx_bytes;
1932
1933 if (!gve_get_report_stats(priv))
1934 return;
1935
1936 be64_add_cpu(&priv->stats_report->written_count, 1);
1937 /* tx stats */
1938 if (priv->tx) {
1939 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1940 u32 last_completion = 0;
1941 u32 tx_frames = 0;
1942
1943 /* DQO doesn't currently support these metrics. */
1944 if (gve_is_gqi(priv)) {
1945 last_completion = priv->tx[idx].done;
1946 tx_frames = priv->tx[idx].req;
1947 }
1948
1949 do {
1950 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1951 tx_bytes = priv->tx[idx].bytes_done;
1952 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1953 stats[stats_idx++] = (struct stats) {
1954 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1955 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1956 .queue_id = cpu_to_be32(idx),
1957 };
1958 stats[stats_idx++] = (struct stats) {
1959 .stat_name = cpu_to_be32(TX_STOP_CNT),
1960 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1961 .queue_id = cpu_to_be32(idx),
1962 };
1963 stats[stats_idx++] = (struct stats) {
1964 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1965 .value = cpu_to_be64(tx_frames),
1966 .queue_id = cpu_to_be32(idx),
1967 };
1968 stats[stats_idx++] = (struct stats) {
1969 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1970 .value = cpu_to_be64(tx_bytes),
1971 .queue_id = cpu_to_be32(idx),
1972 };
1973 stats[stats_idx++] = (struct stats) {
1974 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1975 .value = cpu_to_be64(last_completion),
1976 .queue_id = cpu_to_be32(idx),
1977 };
1978 stats[stats_idx++] = (struct stats) {
1979 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1980 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1981 .queue_id = cpu_to_be32(idx),
1982 };
1983 }
1984 }
1985 /* rx stats */
1986 if (priv->rx) {
1987 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1988 stats[stats_idx++] = (struct stats) {
1989 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1990 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1991 .queue_id = cpu_to_be32(idx),
1992 };
1993 stats[stats_idx++] = (struct stats) {
1994 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1995 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1996 .queue_id = cpu_to_be32(idx),
1997 };
1998 }
1999 }
2000 }
2001
2002 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)2003 static void gve_service_task(struct work_struct *work)
2004 {
2005 struct gve_priv *priv = container_of(work, struct gve_priv,
2006 service_task);
2007 u32 status = ioread32be(&priv->reg_bar0->device_status);
2008
2009 gve_handle_status(priv, status);
2010
2011 gve_handle_reset(priv);
2012 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2013 }
2014
gve_set_netdev_xdp_features(struct gve_priv * priv)2015 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2016 {
2017 xdp_features_t xdp_features;
2018
2019 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2020 xdp_features = NETDEV_XDP_ACT_BASIC;
2021 xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2022 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2023 } else {
2024 xdp_features = 0;
2025 }
2026
2027 xdp_set_features_flag(priv->dev, xdp_features);
2028 }
2029
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2030 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2031 {
2032 int num_ntfy;
2033 int err;
2034
2035 /* Set up the adminq */
2036 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2037 if (err) {
2038 dev_err(&priv->pdev->dev,
2039 "Failed to alloc admin queue: err=%d\n", err);
2040 return err;
2041 }
2042
2043 err = gve_verify_driver_compatibility(priv);
2044 if (err) {
2045 dev_err(&priv->pdev->dev,
2046 "Could not verify driver compatibility: err=%d\n", err);
2047 goto err;
2048 }
2049
2050 if (skip_describe_device)
2051 goto setup_device;
2052
2053 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2054 /* Get the initial information we need from the device */
2055 err = gve_adminq_describe_device(priv);
2056 if (err) {
2057 dev_err(&priv->pdev->dev,
2058 "Could not get device information: err=%d\n", err);
2059 goto err;
2060 }
2061 priv->dev->mtu = priv->dev->max_mtu;
2062 num_ntfy = pci_msix_vec_count(priv->pdev);
2063 if (num_ntfy <= 0) {
2064 dev_err(&priv->pdev->dev,
2065 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2066 err = num_ntfy;
2067 goto err;
2068 } else if (num_ntfy < GVE_MIN_MSIX) {
2069 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2070 GVE_MIN_MSIX, num_ntfy);
2071 err = -EINVAL;
2072 goto err;
2073 }
2074
2075 /* Big TCP is only supported on DQ*/
2076 if (!gve_is_gqi(priv))
2077 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2078
2079 priv->num_registered_pages = 0;
2080 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2081 /* gvnic has one Notification Block per MSI-x vector, except for the
2082 * management vector
2083 */
2084 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2085 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2086
2087 priv->tx_cfg.max_queues =
2088 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2089 priv->rx_cfg.max_queues =
2090 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2091
2092 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2093 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2094 if (priv->default_num_queues > 0) {
2095 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2096 priv->tx_cfg.num_queues);
2097 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2098 priv->rx_cfg.num_queues);
2099 }
2100
2101 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2102 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2103 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2104 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2105
2106 if (!gve_is_gqi(priv)) {
2107 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2108 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2109 }
2110
2111 setup_device:
2112 gve_set_netdev_xdp_features(priv);
2113 err = gve_setup_device_resources(priv);
2114 if (!err)
2115 return 0;
2116 err:
2117 gve_adminq_free(&priv->pdev->dev, priv);
2118 return err;
2119 }
2120
gve_teardown_priv_resources(struct gve_priv * priv)2121 static void gve_teardown_priv_resources(struct gve_priv *priv)
2122 {
2123 gve_teardown_device_resources(priv);
2124 gve_adminq_free(&priv->pdev->dev, priv);
2125 }
2126
gve_trigger_reset(struct gve_priv * priv)2127 static void gve_trigger_reset(struct gve_priv *priv)
2128 {
2129 /* Reset the device by releasing the AQ */
2130 gve_adminq_release(priv);
2131 }
2132
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2133 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2134 {
2135 gve_trigger_reset(priv);
2136 /* With the reset having already happened, close cannot fail */
2137 if (was_up)
2138 gve_close(priv->dev);
2139 gve_teardown_priv_resources(priv);
2140 }
2141
gve_reset_recovery(struct gve_priv * priv,bool was_up)2142 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2143 {
2144 int err;
2145
2146 err = gve_init_priv(priv, true);
2147 if (err)
2148 goto err;
2149 if (was_up) {
2150 err = gve_open(priv->dev);
2151 if (err)
2152 goto err;
2153 }
2154 return 0;
2155 err:
2156 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2157 gve_turndown(priv);
2158 return err;
2159 }
2160
gve_reset(struct gve_priv * priv,bool attempt_teardown)2161 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2162 {
2163 bool was_up = netif_carrier_ok(priv->dev);
2164 int err;
2165
2166 dev_info(&priv->pdev->dev, "Performing reset\n");
2167 gve_clear_do_reset(priv);
2168 gve_set_reset_in_progress(priv);
2169 /* If we aren't attempting to teardown normally, just go turndown and
2170 * reset right away.
2171 */
2172 if (!attempt_teardown) {
2173 gve_turndown(priv);
2174 gve_reset_and_teardown(priv, was_up);
2175 } else {
2176 /* Otherwise attempt to close normally */
2177 if (was_up) {
2178 err = gve_close(priv->dev);
2179 /* If that fails reset as we did above */
2180 if (err)
2181 gve_reset_and_teardown(priv, was_up);
2182 }
2183 /* Clean up any remaining resources */
2184 gve_teardown_priv_resources(priv);
2185 }
2186
2187 /* Set it all back up */
2188 err = gve_reset_recovery(priv, was_up);
2189 gve_clear_reset_in_progress(priv);
2190 priv->reset_cnt++;
2191 priv->interface_up_cnt = 0;
2192 priv->interface_down_cnt = 0;
2193 priv->stats_report_trigger_cnt = 0;
2194 return err;
2195 }
2196
gve_write_version(u8 __iomem * driver_version_register)2197 static void gve_write_version(u8 __iomem *driver_version_register)
2198 {
2199 const char *c = gve_version_prefix;
2200
2201 while (*c) {
2202 writeb(*c, driver_version_register);
2203 c++;
2204 }
2205
2206 c = gve_version_str;
2207 while (*c) {
2208 writeb(*c, driver_version_register);
2209 c++;
2210 }
2211 writeb('\n', driver_version_register);
2212 }
2213
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2214 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2215 {
2216 int max_tx_queues, max_rx_queues;
2217 struct net_device *dev;
2218 __be32 __iomem *db_bar;
2219 struct gve_registers __iomem *reg_bar;
2220 struct gve_priv *priv;
2221 int err;
2222
2223 err = pci_enable_device(pdev);
2224 if (err)
2225 return err;
2226
2227 err = pci_request_regions(pdev, gve_driver_name);
2228 if (err)
2229 goto abort_with_enabled;
2230
2231 pci_set_master(pdev);
2232
2233 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2234 if (err) {
2235 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2236 goto abort_with_pci_region;
2237 }
2238
2239 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2240 if (!reg_bar) {
2241 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2242 err = -ENOMEM;
2243 goto abort_with_pci_region;
2244 }
2245
2246 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2247 if (!db_bar) {
2248 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2249 err = -ENOMEM;
2250 goto abort_with_reg_bar;
2251 }
2252
2253 gve_write_version(®_bar->driver_version);
2254 /* Get max queues to alloc etherdev */
2255 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2256 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2257 /* Alloc and setup the netdev and priv */
2258 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2259 if (!dev) {
2260 dev_err(&pdev->dev, "could not allocate netdev\n");
2261 err = -ENOMEM;
2262 goto abort_with_db_bar;
2263 }
2264 SET_NETDEV_DEV(dev, &pdev->dev);
2265 pci_set_drvdata(pdev, dev);
2266 dev->ethtool_ops = &gve_ethtool_ops;
2267 dev->netdev_ops = &gve_netdev_ops;
2268
2269 /* Set default and supported features.
2270 *
2271 * Features might be set in other locations as well (such as
2272 * `gve_adminq_describe_device`).
2273 */
2274 dev->hw_features = NETIF_F_HIGHDMA;
2275 dev->hw_features |= NETIF_F_SG;
2276 dev->hw_features |= NETIF_F_HW_CSUM;
2277 dev->hw_features |= NETIF_F_TSO;
2278 dev->hw_features |= NETIF_F_TSO6;
2279 dev->hw_features |= NETIF_F_TSO_ECN;
2280 dev->hw_features |= NETIF_F_RXCSUM;
2281 dev->hw_features |= NETIF_F_RXHASH;
2282 dev->features = dev->hw_features;
2283 dev->watchdog_timeo = 5 * HZ;
2284 dev->min_mtu = ETH_MIN_MTU;
2285 netif_carrier_off(dev);
2286
2287 priv = netdev_priv(dev);
2288 priv->dev = dev;
2289 priv->pdev = pdev;
2290 priv->msg_enable = DEFAULT_MSG_LEVEL;
2291 priv->reg_bar0 = reg_bar;
2292 priv->db_bar2 = db_bar;
2293 priv->service_task_flags = 0x0;
2294 priv->state_flags = 0x0;
2295 priv->ethtool_flags = 0x0;
2296
2297 gve_set_probe_in_progress(priv);
2298 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2299 if (!priv->gve_wq) {
2300 dev_err(&pdev->dev, "Could not allocate workqueue");
2301 err = -ENOMEM;
2302 goto abort_with_netdev;
2303 }
2304 INIT_WORK(&priv->service_task, gve_service_task);
2305 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2306 priv->tx_cfg.max_queues = max_tx_queues;
2307 priv->rx_cfg.max_queues = max_rx_queues;
2308
2309 err = gve_init_priv(priv, false);
2310 if (err)
2311 goto abort_with_wq;
2312
2313 err = register_netdev(dev);
2314 if (err)
2315 goto abort_with_gve_init;
2316
2317 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2318 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2319 gve_clear_probe_in_progress(priv);
2320 queue_work(priv->gve_wq, &priv->service_task);
2321 return 0;
2322
2323 abort_with_gve_init:
2324 gve_teardown_priv_resources(priv);
2325
2326 abort_with_wq:
2327 destroy_workqueue(priv->gve_wq);
2328
2329 abort_with_netdev:
2330 free_netdev(dev);
2331
2332 abort_with_db_bar:
2333 pci_iounmap(pdev, db_bar);
2334
2335 abort_with_reg_bar:
2336 pci_iounmap(pdev, reg_bar);
2337
2338 abort_with_pci_region:
2339 pci_release_regions(pdev);
2340
2341 abort_with_enabled:
2342 pci_disable_device(pdev);
2343 return err;
2344 }
2345
gve_remove(struct pci_dev * pdev)2346 static void gve_remove(struct pci_dev *pdev)
2347 {
2348 struct net_device *netdev = pci_get_drvdata(pdev);
2349 struct gve_priv *priv = netdev_priv(netdev);
2350 __be32 __iomem *db_bar = priv->db_bar2;
2351 void __iomem *reg_bar = priv->reg_bar0;
2352
2353 unregister_netdev(netdev);
2354 gve_teardown_priv_resources(priv);
2355 destroy_workqueue(priv->gve_wq);
2356 free_netdev(netdev);
2357 pci_iounmap(pdev, db_bar);
2358 pci_iounmap(pdev, reg_bar);
2359 pci_release_regions(pdev);
2360 pci_disable_device(pdev);
2361 }
2362
gve_shutdown(struct pci_dev * pdev)2363 static void gve_shutdown(struct pci_dev *pdev)
2364 {
2365 struct net_device *netdev = pci_get_drvdata(pdev);
2366 struct gve_priv *priv = netdev_priv(netdev);
2367 bool was_up = netif_carrier_ok(priv->dev);
2368
2369 rtnl_lock();
2370 if (was_up && gve_close(priv->dev)) {
2371 /* If the dev was up, attempt to close, if close fails, reset */
2372 gve_reset_and_teardown(priv, was_up);
2373 } else {
2374 /* If the dev wasn't up or close worked, finish tearing down */
2375 gve_teardown_priv_resources(priv);
2376 }
2377 rtnl_unlock();
2378 }
2379
2380 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2381 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2382 {
2383 struct net_device *netdev = pci_get_drvdata(pdev);
2384 struct gve_priv *priv = netdev_priv(netdev);
2385 bool was_up = netif_carrier_ok(priv->dev);
2386
2387 priv->suspend_cnt++;
2388 rtnl_lock();
2389 if (was_up && gve_close(priv->dev)) {
2390 /* If the dev was up, attempt to close, if close fails, reset */
2391 gve_reset_and_teardown(priv, was_up);
2392 } else {
2393 /* If the dev wasn't up or close worked, finish tearing down */
2394 gve_teardown_priv_resources(priv);
2395 }
2396 priv->up_before_suspend = was_up;
2397 rtnl_unlock();
2398 return 0;
2399 }
2400
gve_resume(struct pci_dev * pdev)2401 static int gve_resume(struct pci_dev *pdev)
2402 {
2403 struct net_device *netdev = pci_get_drvdata(pdev);
2404 struct gve_priv *priv = netdev_priv(netdev);
2405 int err;
2406
2407 priv->resume_cnt++;
2408 rtnl_lock();
2409 err = gve_reset_recovery(priv, priv->up_before_suspend);
2410 rtnl_unlock();
2411 return err;
2412 }
2413 #endif /* CONFIG_PM */
2414
2415 static const struct pci_device_id gve_id_table[] = {
2416 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2417 { }
2418 };
2419
2420 static struct pci_driver gve_driver = {
2421 .name = gve_driver_name,
2422 .id_table = gve_id_table,
2423 .probe = gve_probe,
2424 .remove = gve_remove,
2425 .shutdown = gve_shutdown,
2426 #ifdef CONFIG_PM
2427 .suspend = gve_suspend,
2428 .resume = gve_resume,
2429 #endif
2430 };
2431
2432 module_pci_driver(gve_driver);
2433
2434 MODULE_DEVICE_TABLE(pci, gve_id_table);
2435 MODULE_AUTHOR("Google, Inc.");
2436 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2437 MODULE_LICENSE("Dual MIT/GPL");
2438 MODULE_VERSION(GVE_VERSION);
2439