1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25
26 #define GVE_DEFAULT_RX_COPYBREAK (256)
27
28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION "1.0.0"
30 #define GVE_VERSION_PREFIX "GVE-"
31
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34
35 char gve_driver_name[] = "gve";
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
gve_verify_driver_compatibility(struct gve_priv * priv)39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41 int err;
42 struct gve_driver_info *driver_info;
43 dma_addr_t driver_info_bus;
44
45 driver_info = dma_alloc_coherent(&priv->pdev->dev,
46 sizeof(struct gve_driver_info),
47 &driver_info_bus, GFP_KERNEL);
48 if (!driver_info)
49 return -ENOMEM;
50
51 *driver_info = (struct gve_driver_info) {
52 .os_type = 1, /* Linux */
53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56 .driver_capability_flags = {
57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61 },
62 };
63 strscpy(driver_info->os_version_str1, utsname()->release,
64 sizeof(driver_info->os_version_str1));
65 strscpy(driver_info->os_version_str2, utsname()->version,
66 sizeof(driver_info->os_version_str2));
67
68 err = gve_adminq_verify_driver_compatibility(priv,
69 sizeof(struct gve_driver_info),
70 driver_info_bus);
71
72 /* It's ok if the device doesn't support this */
73 if (err == -EOPNOTSUPP)
74 err = 0;
75
76 dma_free_coherent(&priv->pdev->dev,
77 sizeof(struct gve_driver_info),
78 driver_info, driver_info_bus);
79 return err;
80 }
81
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
83 {
84 struct gve_priv *priv = netdev_priv(dev);
85
86 if (gve_is_gqi(priv))
87 return gve_tx(skb, dev);
88 else
89 return gve_tx_dqo(skb, dev);
90 }
91
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
93 {
94 struct gve_priv *priv = netdev_priv(dev);
95 unsigned int start;
96 u64 packets, bytes;
97 int num_tx_queues;
98 int ring;
99
100 num_tx_queues = gve_num_tx_queues(priv);
101 if (priv->rx) {
102 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
103 do {
104 start =
105 u64_stats_fetch_begin(&priv->rx[ring].statss);
106 packets = priv->rx[ring].rpackets;
107 bytes = priv->rx[ring].rbytes;
108 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
109 start));
110 s->rx_packets += packets;
111 s->rx_bytes += bytes;
112 }
113 }
114 if (priv->tx) {
115 for (ring = 0; ring < num_tx_queues; ring++) {
116 do {
117 start =
118 u64_stats_fetch_begin(&priv->tx[ring].statss);
119 packets = priv->tx[ring].pkt_done;
120 bytes = priv->tx[ring].bytes_done;
121 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
122 start));
123 s->tx_packets += packets;
124 s->tx_bytes += bytes;
125 }
126 }
127 }
128
gve_alloc_counter_array(struct gve_priv * priv)129 static int gve_alloc_counter_array(struct gve_priv *priv)
130 {
131 priv->counter_array =
132 dma_alloc_coherent(&priv->pdev->dev,
133 priv->num_event_counters *
134 sizeof(*priv->counter_array),
135 &priv->counter_array_bus, GFP_KERNEL);
136 if (!priv->counter_array)
137 return -ENOMEM;
138
139 return 0;
140 }
141
gve_free_counter_array(struct gve_priv * priv)142 static void gve_free_counter_array(struct gve_priv *priv)
143 {
144 if (!priv->counter_array)
145 return;
146
147 dma_free_coherent(&priv->pdev->dev,
148 priv->num_event_counters *
149 sizeof(*priv->counter_array),
150 priv->counter_array, priv->counter_array_bus);
151 priv->counter_array = NULL;
152 }
153
154 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)155 static void gve_stats_report_task(struct work_struct *work)
156 {
157 struct gve_priv *priv = container_of(work, struct gve_priv,
158 stats_report_task);
159 if (gve_get_do_report_stats(priv)) {
160 gve_handle_report_stats(priv);
161 gve_clear_do_report_stats(priv);
162 }
163 }
164
gve_stats_report_schedule(struct gve_priv * priv)165 static void gve_stats_report_schedule(struct gve_priv *priv)
166 {
167 if (!gve_get_probe_in_progress(priv) &&
168 !gve_get_reset_in_progress(priv)) {
169 gve_set_do_report_stats(priv);
170 queue_work(priv->gve_wq, &priv->stats_report_task);
171 }
172 }
173
gve_stats_report_timer(struct timer_list * t)174 static void gve_stats_report_timer(struct timer_list *t)
175 {
176 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
177
178 mod_timer(&priv->stats_report_timer,
179 round_jiffies(jiffies +
180 msecs_to_jiffies(priv->stats_report_timer_period)));
181 gve_stats_report_schedule(priv);
182 }
183
gve_alloc_stats_report(struct gve_priv * priv)184 static int gve_alloc_stats_report(struct gve_priv *priv)
185 {
186 int tx_stats_num, rx_stats_num;
187
188 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
189 gve_num_tx_queues(priv);
190 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
191 priv->rx_cfg.num_queues;
192 priv->stats_report_len = struct_size(priv->stats_report, stats,
193 size_add(tx_stats_num, rx_stats_num));
194 priv->stats_report =
195 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
196 &priv->stats_report_bus, GFP_KERNEL);
197 if (!priv->stats_report)
198 return -ENOMEM;
199 /* Set up timer for the report-stats task */
200 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
201 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
202 return 0;
203 }
204
gve_free_stats_report(struct gve_priv * priv)205 static void gve_free_stats_report(struct gve_priv *priv)
206 {
207 if (!priv->stats_report)
208 return;
209
210 del_timer_sync(&priv->stats_report_timer);
211 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
212 priv->stats_report, priv->stats_report_bus);
213 priv->stats_report = NULL;
214 }
215
gve_mgmnt_intr(int irq,void * arg)216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
217 {
218 struct gve_priv *priv = arg;
219
220 queue_work(priv->gve_wq, &priv->service_task);
221 return IRQ_HANDLED;
222 }
223
gve_intr(int irq,void * arg)224 static irqreturn_t gve_intr(int irq, void *arg)
225 {
226 struct gve_notify_block *block = arg;
227 struct gve_priv *priv = block->priv;
228
229 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
230 napi_schedule_irqoff(&block->napi);
231 return IRQ_HANDLED;
232 }
233
gve_intr_dqo(int irq,void * arg)234 static irqreturn_t gve_intr_dqo(int irq, void *arg)
235 {
236 struct gve_notify_block *block = arg;
237
238 /* Interrupts are automatically masked */
239 napi_schedule_irqoff(&block->napi);
240 return IRQ_HANDLED;
241 }
242
gve_napi_poll(struct napi_struct * napi,int budget)243 static int gve_napi_poll(struct napi_struct *napi, int budget)
244 {
245 struct gve_notify_block *block;
246 __be32 __iomem *irq_doorbell;
247 bool reschedule = false;
248 struct gve_priv *priv;
249 int work_done = 0;
250
251 block = container_of(napi, struct gve_notify_block, napi);
252 priv = block->priv;
253
254 if (block->tx) {
255 if (block->tx->q_num < priv->tx_cfg.num_queues)
256 reschedule |= gve_tx_poll(block, budget);
257 else if (budget)
258 reschedule |= gve_xdp_poll(block, budget);
259 }
260
261 if (!budget)
262 return 0;
263
264 if (block->rx) {
265 work_done = gve_rx_poll(block, budget);
266 reschedule |= work_done == budget;
267 }
268
269 if (reschedule)
270 return budget;
271
272 /* Complete processing - don't unmask irq if busy polling is enabled */
273 if (likely(napi_complete_done(napi, work_done))) {
274 irq_doorbell = gve_irq_doorbell(priv, block);
275 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
276
277 /* Ensure IRQ ACK is visible before we check pending work.
278 * If queue had issued updates, it would be truly visible.
279 */
280 mb();
281
282 if (block->tx)
283 reschedule |= gve_tx_clean_pending(priv, block->tx);
284 if (block->rx)
285 reschedule |= gve_rx_work_pending(block->rx);
286
287 if (reschedule && napi_reschedule(napi))
288 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
289 }
290 return work_done;
291 }
292
gve_napi_poll_dqo(struct napi_struct * napi,int budget)293 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
294 {
295 struct gve_notify_block *block =
296 container_of(napi, struct gve_notify_block, napi);
297 struct gve_priv *priv = block->priv;
298 bool reschedule = false;
299 int work_done = 0;
300
301 if (block->tx)
302 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
303
304 if (!budget)
305 return 0;
306
307 if (block->rx) {
308 work_done = gve_rx_poll_dqo(block, budget);
309 reschedule |= work_done == budget;
310 }
311
312 if (reschedule)
313 return budget;
314
315 if (likely(napi_complete_done(napi, work_done))) {
316 /* Enable interrupts again.
317 *
318 * We don't need to repoll afterwards because HW supports the
319 * PCI MSI-X PBA feature.
320 *
321 * Another interrupt would be triggered if a new event came in
322 * since the last one.
323 */
324 gve_write_irq_doorbell_dqo(priv, block,
325 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
326 }
327
328 return work_done;
329 }
330
gve_alloc_notify_blocks(struct gve_priv * priv)331 static int gve_alloc_notify_blocks(struct gve_priv *priv)
332 {
333 int num_vecs_requested = priv->num_ntfy_blks + 1;
334 unsigned int active_cpus;
335 int vecs_enabled;
336 int i, j;
337 int err;
338
339 priv->msix_vectors = kvcalloc(num_vecs_requested,
340 sizeof(*priv->msix_vectors), GFP_KERNEL);
341 if (!priv->msix_vectors)
342 return -ENOMEM;
343 for (i = 0; i < num_vecs_requested; i++)
344 priv->msix_vectors[i].entry = i;
345 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
346 GVE_MIN_MSIX, num_vecs_requested);
347 if (vecs_enabled < 0) {
348 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
349 GVE_MIN_MSIX, vecs_enabled);
350 err = vecs_enabled;
351 goto abort_with_msix_vectors;
352 }
353 if (vecs_enabled != num_vecs_requested) {
354 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
355 int vecs_per_type = new_num_ntfy_blks / 2;
356 int vecs_left = new_num_ntfy_blks % 2;
357
358 priv->num_ntfy_blks = new_num_ntfy_blks;
359 priv->mgmt_msix_idx = priv->num_ntfy_blks;
360 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
361 vecs_per_type);
362 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
363 vecs_per_type + vecs_left);
364 dev_err(&priv->pdev->dev,
365 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
366 vecs_enabled, priv->tx_cfg.max_queues,
367 priv->rx_cfg.max_queues);
368 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
369 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
370 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
371 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
372 }
373 /* Half the notification blocks go to TX and half to RX */
374 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
375
376 /* Setup Management Vector - the last vector */
377 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
378 pci_name(priv->pdev));
379 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
380 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
381 if (err) {
382 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
383 goto abort_with_msix_enabled;
384 }
385 priv->irq_db_indices =
386 dma_alloc_coherent(&priv->pdev->dev,
387 priv->num_ntfy_blks *
388 sizeof(*priv->irq_db_indices),
389 &priv->irq_db_indices_bus, GFP_KERNEL);
390 if (!priv->irq_db_indices) {
391 err = -ENOMEM;
392 goto abort_with_mgmt_vector;
393 }
394
395 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
396 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
397 if (!priv->ntfy_blocks) {
398 err = -ENOMEM;
399 goto abort_with_irq_db_indices;
400 }
401
402 /* Setup the other blocks - the first n-1 vectors */
403 for (i = 0; i < priv->num_ntfy_blks; i++) {
404 struct gve_notify_block *block = &priv->ntfy_blocks[i];
405 int msix_idx = i;
406
407 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
408 i, pci_name(priv->pdev));
409 block->priv = priv;
410 err = request_irq(priv->msix_vectors[msix_idx].vector,
411 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
412 0, block->name, block);
413 if (err) {
414 dev_err(&priv->pdev->dev,
415 "Failed to receive msix vector %d\n", i);
416 goto abort_with_some_ntfy_blocks;
417 }
418 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
419 get_cpu_mask(i % active_cpus));
420 block->irq_db_index = &priv->irq_db_indices[i].index;
421 }
422 return 0;
423 abort_with_some_ntfy_blocks:
424 for (j = 0; j < i; j++) {
425 struct gve_notify_block *block = &priv->ntfy_blocks[j];
426 int msix_idx = j;
427
428 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
429 NULL);
430 free_irq(priv->msix_vectors[msix_idx].vector, block);
431 }
432 kvfree(priv->ntfy_blocks);
433 priv->ntfy_blocks = NULL;
434 abort_with_irq_db_indices:
435 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
436 sizeof(*priv->irq_db_indices),
437 priv->irq_db_indices, priv->irq_db_indices_bus);
438 priv->irq_db_indices = NULL;
439 abort_with_mgmt_vector:
440 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
441 abort_with_msix_enabled:
442 pci_disable_msix(priv->pdev);
443 abort_with_msix_vectors:
444 kvfree(priv->msix_vectors);
445 priv->msix_vectors = NULL;
446 return err;
447 }
448
gve_free_notify_blocks(struct gve_priv * priv)449 static void gve_free_notify_blocks(struct gve_priv *priv)
450 {
451 int i;
452
453 if (!priv->msix_vectors)
454 return;
455
456 /* Free the irqs */
457 for (i = 0; i < priv->num_ntfy_blks; i++) {
458 struct gve_notify_block *block = &priv->ntfy_blocks[i];
459 int msix_idx = i;
460
461 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
462 NULL);
463 free_irq(priv->msix_vectors[msix_idx].vector, block);
464 }
465 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
466 kvfree(priv->ntfy_blocks);
467 priv->ntfy_blocks = NULL;
468 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
469 sizeof(*priv->irq_db_indices),
470 priv->irq_db_indices, priv->irq_db_indices_bus);
471 priv->irq_db_indices = NULL;
472 pci_disable_msix(priv->pdev);
473 kvfree(priv->msix_vectors);
474 priv->msix_vectors = NULL;
475 }
476
gve_setup_device_resources(struct gve_priv * priv)477 static int gve_setup_device_resources(struct gve_priv *priv)
478 {
479 int err;
480
481 err = gve_alloc_counter_array(priv);
482 if (err)
483 return err;
484 err = gve_alloc_notify_blocks(priv);
485 if (err)
486 goto abort_with_counter;
487 err = gve_alloc_stats_report(priv);
488 if (err)
489 goto abort_with_ntfy_blocks;
490 err = gve_adminq_configure_device_resources(priv,
491 priv->counter_array_bus,
492 priv->num_event_counters,
493 priv->irq_db_indices_bus,
494 priv->num_ntfy_blks);
495 if (unlikely(err)) {
496 dev_err(&priv->pdev->dev,
497 "could not setup device_resources: err=%d\n", err);
498 err = -ENXIO;
499 goto abort_with_stats_report;
500 }
501
502 if (!gve_is_gqi(priv)) {
503 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
504 GFP_KERNEL);
505 if (!priv->ptype_lut_dqo) {
506 err = -ENOMEM;
507 goto abort_with_stats_report;
508 }
509 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
510 if (err) {
511 dev_err(&priv->pdev->dev,
512 "Failed to get ptype map: err=%d\n", err);
513 goto abort_with_ptype_lut;
514 }
515 }
516
517 err = gve_adminq_report_stats(priv, priv->stats_report_len,
518 priv->stats_report_bus,
519 GVE_STATS_REPORT_TIMER_PERIOD);
520 if (err)
521 dev_err(&priv->pdev->dev,
522 "Failed to report stats: err=%d\n", err);
523 gve_set_device_resources_ok(priv);
524 return 0;
525
526 abort_with_ptype_lut:
527 kvfree(priv->ptype_lut_dqo);
528 priv->ptype_lut_dqo = NULL;
529 abort_with_stats_report:
530 gve_free_stats_report(priv);
531 abort_with_ntfy_blocks:
532 gve_free_notify_blocks(priv);
533 abort_with_counter:
534 gve_free_counter_array(priv);
535
536 return err;
537 }
538
539 static void gve_trigger_reset(struct gve_priv *priv);
540
gve_teardown_device_resources(struct gve_priv * priv)541 static void gve_teardown_device_resources(struct gve_priv *priv)
542 {
543 int err;
544
545 /* Tell device its resources are being freed */
546 if (gve_get_device_resources_ok(priv)) {
547 /* detach the stats report */
548 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
549 if (err) {
550 dev_err(&priv->pdev->dev,
551 "Failed to detach stats report: err=%d\n", err);
552 gve_trigger_reset(priv);
553 }
554 err = gve_adminq_deconfigure_device_resources(priv);
555 if (err) {
556 dev_err(&priv->pdev->dev,
557 "Could not deconfigure device resources: err=%d\n",
558 err);
559 gve_trigger_reset(priv);
560 }
561 }
562
563 kvfree(priv->ptype_lut_dqo);
564 priv->ptype_lut_dqo = NULL;
565
566 gve_free_counter_array(priv);
567 gve_free_notify_blocks(priv);
568 gve_free_stats_report(priv);
569 gve_clear_device_resources_ok(priv);
570 }
571
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))572 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
573 int (*gve_poll)(struct napi_struct *, int))
574 {
575 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
576
577 netif_napi_add(priv->dev, &block->napi, gve_poll);
578 }
579
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)580 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
581 {
582 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
583
584 netif_napi_del(&block->napi);
585 }
586
gve_register_xdp_qpls(struct gve_priv * priv)587 static int gve_register_xdp_qpls(struct gve_priv *priv)
588 {
589 int start_id;
590 int err;
591 int i;
592
593 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
594 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
595 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
596 if (err) {
597 netif_err(priv, drv, priv->dev,
598 "failed to register queue page list %d\n",
599 priv->qpls[i].id);
600 /* This failure will trigger a reset - no need to clean
601 * up
602 */
603 return err;
604 }
605 }
606 return 0;
607 }
608
gve_register_qpls(struct gve_priv * priv)609 static int gve_register_qpls(struct gve_priv *priv)
610 {
611 int start_id;
612 int err;
613 int i;
614
615 start_id = gve_tx_start_qpl_id(priv);
616 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
617 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
618 if (err) {
619 netif_err(priv, drv, priv->dev,
620 "failed to register queue page list %d\n",
621 priv->qpls[i].id);
622 /* This failure will trigger a reset - no need to clean
623 * up
624 */
625 return err;
626 }
627 }
628
629 start_id = gve_rx_start_qpl_id(priv);
630 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
631 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
632 if (err) {
633 netif_err(priv, drv, priv->dev,
634 "failed to register queue page list %d\n",
635 priv->qpls[i].id);
636 /* This failure will trigger a reset - no need to clean
637 * up
638 */
639 return err;
640 }
641 }
642 return 0;
643 }
644
gve_unregister_xdp_qpls(struct gve_priv * priv)645 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
646 {
647 int start_id;
648 int err;
649 int i;
650
651 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
652 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
653 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
654 /* This failure will trigger a reset - no need to clean up */
655 if (err) {
656 netif_err(priv, drv, priv->dev,
657 "Failed to unregister queue page list %d\n",
658 priv->qpls[i].id);
659 return err;
660 }
661 }
662 return 0;
663 }
664
gve_unregister_qpls(struct gve_priv * priv)665 static int gve_unregister_qpls(struct gve_priv *priv)
666 {
667 int start_id;
668 int err;
669 int i;
670
671 start_id = gve_tx_start_qpl_id(priv);
672 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
673 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
674 /* This failure will trigger a reset - no need to clean up */
675 if (err) {
676 netif_err(priv, drv, priv->dev,
677 "Failed to unregister queue page list %d\n",
678 priv->qpls[i].id);
679 return err;
680 }
681 }
682
683 start_id = gve_rx_start_qpl_id(priv);
684 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
685 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
686 /* This failure will trigger a reset - no need to clean up */
687 if (err) {
688 netif_err(priv, drv, priv->dev,
689 "Failed to unregister queue page list %d\n",
690 priv->qpls[i].id);
691 return err;
692 }
693 }
694 return 0;
695 }
696
gve_create_xdp_rings(struct gve_priv * priv)697 static int gve_create_xdp_rings(struct gve_priv *priv)
698 {
699 int err;
700
701 err = gve_adminq_create_tx_queues(priv,
702 gve_xdp_tx_start_queue_id(priv),
703 priv->num_xdp_queues);
704 if (err) {
705 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
706 priv->num_xdp_queues);
707 /* This failure will trigger a reset - no need to clean
708 * up
709 */
710 return err;
711 }
712 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
713 priv->num_xdp_queues);
714
715 return 0;
716 }
717
gve_create_rings(struct gve_priv * priv)718 static int gve_create_rings(struct gve_priv *priv)
719 {
720 int num_tx_queues = gve_num_tx_queues(priv);
721 int err;
722 int i;
723
724 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
725 if (err) {
726 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
727 num_tx_queues);
728 /* This failure will trigger a reset - no need to clean
729 * up
730 */
731 return err;
732 }
733 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
734 num_tx_queues);
735
736 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
737 if (err) {
738 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
739 priv->rx_cfg.num_queues);
740 /* This failure will trigger a reset - no need to clean
741 * up
742 */
743 return err;
744 }
745 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
746 priv->rx_cfg.num_queues);
747
748 if (gve_is_gqi(priv)) {
749 /* Rx data ring has been prefilled with packet buffers at queue
750 * allocation time.
751 *
752 * Write the doorbell to provide descriptor slots and packet
753 * buffers to the NIC.
754 */
755 for (i = 0; i < priv->rx_cfg.num_queues; i++)
756 gve_rx_write_doorbell(priv, &priv->rx[i]);
757 } else {
758 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
759 /* Post buffers and ring doorbell. */
760 gve_rx_post_buffers_dqo(&priv->rx[i]);
761 }
762 }
763
764 return 0;
765 }
766
add_napi_init_xdp_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))767 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
768 int (*napi_poll)(struct napi_struct *napi,
769 int budget))
770 {
771 int start_id = gve_xdp_tx_start_queue_id(priv);
772 int i;
773
774 /* Add xdp tx napi & init sync stats*/
775 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
776 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
777
778 u64_stats_init(&priv->tx[i].statss);
779 priv->tx[i].ntfy_id = ntfy_idx;
780 gve_add_napi(priv, ntfy_idx, napi_poll);
781 }
782 }
783
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))784 static void add_napi_init_sync_stats(struct gve_priv *priv,
785 int (*napi_poll)(struct napi_struct *napi,
786 int budget))
787 {
788 int i;
789
790 /* Add tx napi & init sync stats*/
791 for (i = 0; i < gve_num_tx_queues(priv); i++) {
792 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
793
794 u64_stats_init(&priv->tx[i].statss);
795 priv->tx[i].ntfy_id = ntfy_idx;
796 gve_add_napi(priv, ntfy_idx, napi_poll);
797 }
798 /* Add rx napi & init sync stats*/
799 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
800 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
801
802 u64_stats_init(&priv->rx[i].statss);
803 priv->rx[i].ntfy_id = ntfy_idx;
804 gve_add_napi(priv, ntfy_idx, napi_poll);
805 }
806 }
807
gve_tx_free_rings(struct gve_priv * priv,int start_id,int num_rings)808 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
809 {
810 if (gve_is_gqi(priv)) {
811 gve_tx_free_rings_gqi(priv, start_id, num_rings);
812 } else {
813 gve_tx_free_rings_dqo(priv);
814 }
815 }
816
gve_alloc_xdp_rings(struct gve_priv * priv)817 static int gve_alloc_xdp_rings(struct gve_priv *priv)
818 {
819 int start_id;
820 int err = 0;
821
822 if (!priv->num_xdp_queues)
823 return 0;
824
825 start_id = gve_xdp_tx_start_queue_id(priv);
826 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
827 if (err)
828 return err;
829 add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
830
831 return 0;
832 }
833
gve_alloc_rings(struct gve_priv * priv)834 static int gve_alloc_rings(struct gve_priv *priv)
835 {
836 int err;
837
838 /* Setup tx rings */
839 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
840 GFP_KERNEL);
841 if (!priv->tx)
842 return -ENOMEM;
843
844 if (gve_is_gqi(priv))
845 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
846 else
847 err = gve_tx_alloc_rings_dqo(priv);
848 if (err)
849 goto free_tx;
850
851 /* Setup rx rings */
852 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
853 GFP_KERNEL);
854 if (!priv->rx) {
855 err = -ENOMEM;
856 goto free_tx_queue;
857 }
858
859 if (gve_is_gqi(priv))
860 err = gve_rx_alloc_rings(priv);
861 else
862 err = gve_rx_alloc_rings_dqo(priv);
863 if (err)
864 goto free_rx;
865
866 if (gve_is_gqi(priv))
867 add_napi_init_sync_stats(priv, gve_napi_poll);
868 else
869 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
870
871 return 0;
872
873 free_rx:
874 kvfree(priv->rx);
875 priv->rx = NULL;
876 free_tx_queue:
877 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
878 free_tx:
879 kvfree(priv->tx);
880 priv->tx = NULL;
881 return err;
882 }
883
gve_destroy_xdp_rings(struct gve_priv * priv)884 static int gve_destroy_xdp_rings(struct gve_priv *priv)
885 {
886 int start_id;
887 int err;
888
889 start_id = gve_xdp_tx_start_queue_id(priv);
890 err = gve_adminq_destroy_tx_queues(priv,
891 start_id,
892 priv->num_xdp_queues);
893 if (err) {
894 netif_err(priv, drv, priv->dev,
895 "failed to destroy XDP queues\n");
896 /* This failure will trigger a reset - no need to clean up */
897 return err;
898 }
899 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
900
901 return 0;
902 }
903
gve_destroy_rings(struct gve_priv * priv)904 static int gve_destroy_rings(struct gve_priv *priv)
905 {
906 int num_tx_queues = gve_num_tx_queues(priv);
907 int err;
908
909 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
910 if (err) {
911 netif_err(priv, drv, priv->dev,
912 "failed to destroy tx queues\n");
913 /* This failure will trigger a reset - no need to clean up */
914 return err;
915 }
916 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
917 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
918 if (err) {
919 netif_err(priv, drv, priv->dev,
920 "failed to destroy rx queues\n");
921 /* This failure will trigger a reset - no need to clean up */
922 return err;
923 }
924 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
925 return 0;
926 }
927
gve_rx_free_rings(struct gve_priv * priv)928 static void gve_rx_free_rings(struct gve_priv *priv)
929 {
930 if (gve_is_gqi(priv))
931 gve_rx_free_rings_gqi(priv);
932 else
933 gve_rx_free_rings_dqo(priv);
934 }
935
gve_free_xdp_rings(struct gve_priv * priv)936 static void gve_free_xdp_rings(struct gve_priv *priv)
937 {
938 int ntfy_idx, start_id;
939 int i;
940
941 start_id = gve_xdp_tx_start_queue_id(priv);
942 if (priv->tx) {
943 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
944 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
945 gve_remove_napi(priv, ntfy_idx);
946 }
947 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
948 }
949 }
950
gve_free_rings(struct gve_priv * priv)951 static void gve_free_rings(struct gve_priv *priv)
952 {
953 int num_tx_queues = gve_num_tx_queues(priv);
954 int ntfy_idx;
955 int i;
956
957 if (priv->tx) {
958 for (i = 0; i < num_tx_queues; i++) {
959 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
960 gve_remove_napi(priv, ntfy_idx);
961 }
962 gve_tx_free_rings(priv, 0, num_tx_queues);
963 kvfree(priv->tx);
964 priv->tx = NULL;
965 }
966 if (priv->rx) {
967 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
968 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
969 gve_remove_napi(priv, ntfy_idx);
970 }
971 gve_rx_free_rings(priv);
972 kvfree(priv->rx);
973 priv->rx = NULL;
974 }
975 }
976
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)977 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
978 struct page **page, dma_addr_t *dma,
979 enum dma_data_direction dir, gfp_t gfp_flags)
980 {
981 *page = alloc_page(gfp_flags);
982 if (!*page) {
983 priv->page_alloc_fail++;
984 return -ENOMEM;
985 }
986 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
987 if (dma_mapping_error(dev, *dma)) {
988 priv->dma_mapping_error++;
989 put_page(*page);
990 return -ENOMEM;
991 }
992 return 0;
993 }
994
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)995 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
996 int pages)
997 {
998 struct gve_queue_page_list *qpl = &priv->qpls[id];
999 int err;
1000 int i;
1001
1002 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
1003 netif_err(priv, drv, priv->dev,
1004 "Reached max number of registered pages %llu > %llu\n",
1005 pages + priv->num_registered_pages,
1006 priv->max_registered_pages);
1007 return -EINVAL;
1008 }
1009
1010 qpl->id = id;
1011 qpl->num_entries = 0;
1012 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1013 /* caller handles clean up */
1014 if (!qpl->pages)
1015 return -ENOMEM;
1016 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1017 /* caller handles clean up */
1018 if (!qpl->page_buses)
1019 return -ENOMEM;
1020
1021 for (i = 0; i < pages; i++) {
1022 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1023 &qpl->page_buses[i],
1024 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1025 /* caller handles clean up */
1026 if (err)
1027 return -ENOMEM;
1028 qpl->num_entries++;
1029 }
1030 priv->num_registered_pages += pages;
1031
1032 return 0;
1033 }
1034
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1035 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1036 enum dma_data_direction dir)
1037 {
1038 if (!dma_mapping_error(dev, dma))
1039 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1040 if (page)
1041 put_page(page);
1042 }
1043
gve_free_queue_page_list(struct gve_priv * priv,u32 id)1044 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1045 {
1046 struct gve_queue_page_list *qpl = &priv->qpls[id];
1047 int i;
1048
1049 if (!qpl->pages)
1050 return;
1051 if (!qpl->page_buses)
1052 goto free_pages;
1053
1054 for (i = 0; i < qpl->num_entries; i++)
1055 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1056 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1057
1058 kvfree(qpl->page_buses);
1059 qpl->page_buses = NULL;
1060 free_pages:
1061 kvfree(qpl->pages);
1062 qpl->pages = NULL;
1063 priv->num_registered_pages -= qpl->num_entries;
1064 }
1065
gve_alloc_xdp_qpls(struct gve_priv * priv)1066 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1067 {
1068 int start_id;
1069 int i, j;
1070 int err;
1071
1072 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1073 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1074 err = gve_alloc_queue_page_list(priv, i,
1075 priv->tx_pages_per_qpl);
1076 if (err)
1077 goto free_qpls;
1078 }
1079
1080 return 0;
1081
1082 free_qpls:
1083 for (j = start_id; j <= i; j++)
1084 gve_free_queue_page_list(priv, j);
1085 return err;
1086 }
1087
gve_alloc_qpls(struct gve_priv * priv)1088 static int gve_alloc_qpls(struct gve_priv *priv)
1089 {
1090 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1091 int page_count;
1092 int start_id;
1093 int i, j;
1094 int err;
1095
1096 if (!gve_is_qpl(priv))
1097 return 0;
1098
1099 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1100 if (!priv->qpls)
1101 return -ENOMEM;
1102
1103 start_id = gve_tx_start_qpl_id(priv);
1104 page_count = priv->tx_pages_per_qpl;
1105 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1106 err = gve_alloc_queue_page_list(priv, i,
1107 page_count);
1108 if (err)
1109 goto free_qpls;
1110 }
1111
1112 start_id = gve_rx_start_qpl_id(priv);
1113
1114 /* For GQI_QPL number of pages allocated have 1:1 relationship with
1115 * number of descriptors. For DQO, number of pages required are
1116 * more than descriptors (because of out of order completions).
1117 */
1118 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1119 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1120 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1121 err = gve_alloc_queue_page_list(priv, i,
1122 page_count);
1123 if (err)
1124 goto free_qpls;
1125 }
1126
1127 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1128 sizeof(unsigned long) * BITS_PER_BYTE;
1129 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1130 sizeof(unsigned long), GFP_KERNEL);
1131 if (!priv->qpl_cfg.qpl_id_map) {
1132 err = -ENOMEM;
1133 goto free_qpls;
1134 }
1135
1136 return 0;
1137
1138 free_qpls:
1139 for (j = 0; j <= i; j++)
1140 gve_free_queue_page_list(priv, j);
1141 kvfree(priv->qpls);
1142 priv->qpls = NULL;
1143 return err;
1144 }
1145
gve_free_xdp_qpls(struct gve_priv * priv)1146 static void gve_free_xdp_qpls(struct gve_priv *priv)
1147 {
1148 int start_id;
1149 int i;
1150
1151 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1152 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1153 gve_free_queue_page_list(priv, i);
1154 }
1155
gve_free_qpls(struct gve_priv * priv)1156 static void gve_free_qpls(struct gve_priv *priv)
1157 {
1158 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1159 int i;
1160
1161 if (!priv->qpls)
1162 return;
1163
1164 kvfree(priv->qpl_cfg.qpl_id_map);
1165 priv->qpl_cfg.qpl_id_map = NULL;
1166
1167 for (i = 0; i < max_queues; i++)
1168 gve_free_queue_page_list(priv, i);
1169
1170 kvfree(priv->qpls);
1171 priv->qpls = NULL;
1172 }
1173
1174 /* Use this to schedule a reset when the device is capable of continuing
1175 * to handle other requests in its current state. If it is not, do a reset
1176 * in thread instead.
1177 */
gve_schedule_reset(struct gve_priv * priv)1178 void gve_schedule_reset(struct gve_priv *priv)
1179 {
1180 gve_set_do_reset(priv);
1181 queue_work(priv->gve_wq, &priv->service_task);
1182 }
1183
1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1186 static void gve_turndown(struct gve_priv *priv);
1187 static void gve_turnup(struct gve_priv *priv);
1188
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1190 {
1191 struct napi_struct *napi;
1192 struct gve_rx_ring *rx;
1193 int err = 0;
1194 int i, j;
1195 u32 tx_qid;
1196
1197 if (!priv->num_xdp_queues)
1198 return 0;
1199
1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1201 rx = &priv->rx[i];
1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1203
1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1205 napi->napi_id);
1206 if (err)
1207 goto err;
1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1209 MEM_TYPE_PAGE_SHARED, NULL);
1210 if (err)
1211 goto err;
1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1213 if (rx->xsk_pool) {
1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1215 napi->napi_id);
1216 if (err)
1217 goto err;
1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1219 MEM_TYPE_XSK_BUFF_POOL, NULL);
1220 if (err)
1221 goto err;
1222 xsk_pool_set_rxq_info(rx->xsk_pool,
1223 &rx->xsk_rxq);
1224 }
1225 }
1226
1227 for (i = 0; i < priv->num_xdp_queues; i++) {
1228 tx_qid = gve_xdp_tx_queue_id(priv, i);
1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1230 }
1231 return 0;
1232
1233 err:
1234 for (j = i; j >= 0; j--) {
1235 rx = &priv->rx[j];
1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1237 xdp_rxq_info_unreg(&rx->xdp_rxq);
1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1239 xdp_rxq_info_unreg(&rx->xsk_rxq);
1240 }
1241 return err;
1242 }
1243
gve_unreg_xdp_info(struct gve_priv * priv)1244 static void gve_unreg_xdp_info(struct gve_priv *priv)
1245 {
1246 int i, tx_qid;
1247
1248 if (!priv->num_xdp_queues)
1249 return;
1250
1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1252 struct gve_rx_ring *rx = &priv->rx[i];
1253
1254 xdp_rxq_info_unreg(&rx->xdp_rxq);
1255 if (rx->xsk_pool) {
1256 xdp_rxq_info_unreg(&rx->xsk_rxq);
1257 rx->xsk_pool = NULL;
1258 }
1259 }
1260
1261 for (i = 0; i < priv->num_xdp_queues; i++) {
1262 tx_qid = gve_xdp_tx_queue_id(priv, i);
1263 priv->tx[tx_qid].xsk_pool = NULL;
1264 }
1265 }
1266
gve_drain_page_cache(struct gve_priv * priv)1267 static void gve_drain_page_cache(struct gve_priv *priv)
1268 {
1269 struct page_frag_cache *nc;
1270 int i;
1271
1272 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1273 nc = &priv->rx[i].page_cache;
1274 if (nc->va) {
1275 __page_frag_cache_drain(virt_to_page(nc->va),
1276 nc->pagecnt_bias);
1277 nc->va = NULL;
1278 }
1279 }
1280 }
1281
gve_open(struct net_device * dev)1282 static int gve_open(struct net_device *dev)
1283 {
1284 struct gve_priv *priv = netdev_priv(dev);
1285 int err;
1286
1287 if (priv->xdp_prog)
1288 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1289 else
1290 priv->num_xdp_queues = 0;
1291
1292 err = gve_alloc_qpls(priv);
1293 if (err)
1294 return err;
1295
1296 err = gve_alloc_rings(priv);
1297 if (err)
1298 goto free_qpls;
1299
1300 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1301 if (err)
1302 goto free_rings;
1303 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1304 if (err)
1305 goto free_rings;
1306
1307 err = gve_reg_xdp_info(priv, dev);
1308 if (err)
1309 goto free_rings;
1310
1311 err = gve_register_qpls(priv);
1312 if (err)
1313 goto reset;
1314
1315 if (!gve_is_gqi(priv)) {
1316 /* Hard code this for now. This may be tuned in the future for
1317 * performance.
1318 */
1319 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1320 }
1321 err = gve_create_rings(priv);
1322 if (err)
1323 goto reset;
1324
1325 gve_set_device_rings_ok(priv);
1326
1327 if (gve_get_report_stats(priv))
1328 mod_timer(&priv->stats_report_timer,
1329 round_jiffies(jiffies +
1330 msecs_to_jiffies(priv->stats_report_timer_period)));
1331
1332 gve_turnup(priv);
1333 queue_work(priv->gve_wq, &priv->service_task);
1334 priv->interface_up_cnt++;
1335 return 0;
1336
1337 free_rings:
1338 gve_free_rings(priv);
1339 free_qpls:
1340 gve_free_qpls(priv);
1341 return err;
1342
1343 reset:
1344 /* This must have been called from a reset due to the rtnl lock
1345 * so just return at this point.
1346 */
1347 if (gve_get_reset_in_progress(priv))
1348 return err;
1349 /* Otherwise reset before returning */
1350 gve_reset_and_teardown(priv, true);
1351 /* if this fails there is nothing we can do so just ignore the return */
1352 gve_reset_recovery(priv, false);
1353 /* return the original error */
1354 return err;
1355 }
1356
gve_close(struct net_device * dev)1357 static int gve_close(struct net_device *dev)
1358 {
1359 struct gve_priv *priv = netdev_priv(dev);
1360 int err;
1361
1362 netif_carrier_off(dev);
1363 if (gve_get_device_rings_ok(priv)) {
1364 gve_turndown(priv);
1365 gve_drain_page_cache(priv);
1366 err = gve_destroy_rings(priv);
1367 if (err)
1368 goto err;
1369 err = gve_unregister_qpls(priv);
1370 if (err)
1371 goto err;
1372 gve_clear_device_rings_ok(priv);
1373 }
1374 del_timer_sync(&priv->stats_report_timer);
1375
1376 gve_unreg_xdp_info(priv);
1377 gve_free_rings(priv);
1378 gve_free_qpls(priv);
1379 priv->interface_down_cnt++;
1380 return 0;
1381
1382 err:
1383 /* This must have been called from a reset due to the rtnl lock
1384 * so just return at this point.
1385 */
1386 if (gve_get_reset_in_progress(priv))
1387 return err;
1388 /* Otherwise reset before returning */
1389 gve_reset_and_teardown(priv, true);
1390 return gve_reset_recovery(priv, false);
1391 }
1392
gve_remove_xdp_queues(struct gve_priv * priv)1393 static int gve_remove_xdp_queues(struct gve_priv *priv)
1394 {
1395 int err;
1396
1397 err = gve_destroy_xdp_rings(priv);
1398 if (err)
1399 return err;
1400
1401 err = gve_unregister_xdp_qpls(priv);
1402 if (err)
1403 return err;
1404
1405 gve_unreg_xdp_info(priv);
1406 gve_free_xdp_rings(priv);
1407 gve_free_xdp_qpls(priv);
1408 priv->num_xdp_queues = 0;
1409 return 0;
1410 }
1411
gve_add_xdp_queues(struct gve_priv * priv)1412 static int gve_add_xdp_queues(struct gve_priv *priv)
1413 {
1414 int err;
1415
1416 priv->num_xdp_queues = priv->tx_cfg.num_queues;
1417
1418 err = gve_alloc_xdp_qpls(priv);
1419 if (err)
1420 goto err;
1421
1422 err = gve_alloc_xdp_rings(priv);
1423 if (err)
1424 goto free_xdp_qpls;
1425
1426 err = gve_reg_xdp_info(priv, priv->dev);
1427 if (err)
1428 goto free_xdp_rings;
1429
1430 err = gve_register_xdp_qpls(priv);
1431 if (err)
1432 goto free_xdp_rings;
1433
1434 err = gve_create_xdp_rings(priv);
1435 if (err)
1436 goto free_xdp_rings;
1437
1438 return 0;
1439
1440 free_xdp_rings:
1441 gve_free_xdp_rings(priv);
1442 free_xdp_qpls:
1443 gve_free_xdp_qpls(priv);
1444 err:
1445 priv->num_xdp_queues = 0;
1446 return err;
1447 }
1448
gve_handle_link_status(struct gve_priv * priv,bool link_status)1449 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1450 {
1451 if (!gve_get_napi_enabled(priv))
1452 return;
1453
1454 if (link_status == netif_carrier_ok(priv->dev))
1455 return;
1456
1457 if (link_status) {
1458 netdev_info(priv->dev, "Device link is up.\n");
1459 netif_carrier_on(priv->dev);
1460 } else {
1461 netdev_info(priv->dev, "Device link is down.\n");
1462 netif_carrier_off(priv->dev);
1463 }
1464 }
1465
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1466 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1467 struct netlink_ext_ack *extack)
1468 {
1469 struct bpf_prog *old_prog;
1470 int err = 0;
1471 u32 status;
1472
1473 old_prog = READ_ONCE(priv->xdp_prog);
1474 if (!netif_carrier_ok(priv->dev)) {
1475 WRITE_ONCE(priv->xdp_prog, prog);
1476 if (old_prog)
1477 bpf_prog_put(old_prog);
1478 return 0;
1479 }
1480
1481 gve_turndown(priv);
1482 if (!old_prog && prog) {
1483 // Allocate XDP TX queues if an XDP program is
1484 // being installed
1485 err = gve_add_xdp_queues(priv);
1486 if (err)
1487 goto out;
1488 } else if (old_prog && !prog) {
1489 // Remove XDP TX queues if an XDP program is
1490 // being uninstalled
1491 err = gve_remove_xdp_queues(priv);
1492 if (err)
1493 goto out;
1494 }
1495 WRITE_ONCE(priv->xdp_prog, prog);
1496 if (old_prog)
1497 bpf_prog_put(old_prog);
1498
1499 out:
1500 gve_turnup(priv);
1501 status = ioread32be(&priv->reg_bar0->device_status);
1502 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1503 return err;
1504 }
1505
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1506 static int gve_xsk_pool_enable(struct net_device *dev,
1507 struct xsk_buff_pool *pool,
1508 u16 qid)
1509 {
1510 struct gve_priv *priv = netdev_priv(dev);
1511 struct napi_struct *napi;
1512 struct gve_rx_ring *rx;
1513 int tx_qid;
1514 int err;
1515
1516 if (qid >= priv->rx_cfg.num_queues) {
1517 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1518 return -EINVAL;
1519 }
1520 if (xsk_pool_get_rx_frame_size(pool) <
1521 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1522 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1523 return -EINVAL;
1524 }
1525
1526 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1527 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1528 if (err)
1529 return err;
1530
1531 /* If XDP prog is not installed or interface is down, return. */
1532 if (!priv->xdp_prog || !netif_running(dev))
1533 return 0;
1534
1535 rx = &priv->rx[qid];
1536 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1537 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1538 if (err)
1539 goto err;
1540
1541 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1542 MEM_TYPE_XSK_BUFF_POOL, NULL);
1543 if (err)
1544 goto err;
1545
1546 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1547 rx->xsk_pool = pool;
1548
1549 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1550 priv->tx[tx_qid].xsk_pool = pool;
1551
1552 return 0;
1553 err:
1554 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1555 xdp_rxq_info_unreg(&rx->xsk_rxq);
1556
1557 xsk_pool_dma_unmap(pool,
1558 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1559 return err;
1560 }
1561
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1562 static int gve_xsk_pool_disable(struct net_device *dev,
1563 u16 qid)
1564 {
1565 struct gve_priv *priv = netdev_priv(dev);
1566 struct napi_struct *napi_rx;
1567 struct napi_struct *napi_tx;
1568 struct xsk_buff_pool *pool;
1569 int tx_qid;
1570
1571 pool = xsk_get_pool_from_qid(dev, qid);
1572 if (!pool)
1573 return -EINVAL;
1574 if (qid >= priv->rx_cfg.num_queues)
1575 return -EINVAL;
1576
1577 /* If XDP prog is not installed or interface is down, unmap DMA and
1578 * return.
1579 */
1580 if (!priv->xdp_prog || !netif_running(dev))
1581 goto done;
1582
1583 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1584 napi_disable(napi_rx); /* make sure current rx poll is done */
1585
1586 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1587 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1588 napi_disable(napi_tx); /* make sure current tx poll is done */
1589
1590 priv->rx[qid].xsk_pool = NULL;
1591 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1592 priv->tx[tx_qid].xsk_pool = NULL;
1593 smp_mb(); /* Make sure it is visible to the workers on datapath */
1594
1595 napi_enable(napi_rx);
1596 if (gve_rx_work_pending(&priv->rx[qid]))
1597 napi_schedule(napi_rx);
1598
1599 napi_enable(napi_tx);
1600 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1601 napi_schedule(napi_tx);
1602
1603 done:
1604 xsk_pool_dma_unmap(pool,
1605 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1606 return 0;
1607 }
1608
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1609 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1610 {
1611 struct gve_priv *priv = netdev_priv(dev);
1612 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1613
1614 if (!gve_get_napi_enabled(priv))
1615 return -ENETDOWN;
1616
1617 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1618 return -EINVAL;
1619
1620 if (flags & XDP_WAKEUP_TX) {
1621 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1622 struct napi_struct *napi =
1623 &priv->ntfy_blocks[tx->ntfy_id].napi;
1624
1625 if (!napi_if_scheduled_mark_missed(napi)) {
1626 /* Call local_bh_enable to trigger SoftIRQ processing */
1627 local_bh_disable();
1628 napi_schedule(napi);
1629 local_bh_enable();
1630 }
1631
1632 tx->xdp_xsk_wakeup++;
1633 }
1634
1635 return 0;
1636 }
1637
verify_xdp_configuration(struct net_device * dev)1638 static int verify_xdp_configuration(struct net_device *dev)
1639 {
1640 struct gve_priv *priv = netdev_priv(dev);
1641
1642 if (dev->features & NETIF_F_LRO) {
1643 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1644 return -EOPNOTSUPP;
1645 }
1646
1647 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1648 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1649 priv->queue_format);
1650 return -EOPNOTSUPP;
1651 }
1652
1653 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
1654 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1655 dev->mtu);
1656 return -EOPNOTSUPP;
1657 }
1658
1659 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1660 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1661 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1662 priv->rx_cfg.num_queues,
1663 priv->tx_cfg.num_queues,
1664 priv->tx_cfg.max_queues);
1665 return -EINVAL;
1666 }
1667 return 0;
1668 }
1669
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1670 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1671 {
1672 struct gve_priv *priv = netdev_priv(dev);
1673 int err;
1674
1675 err = verify_xdp_configuration(dev);
1676 if (err)
1677 return err;
1678 switch (xdp->command) {
1679 case XDP_SETUP_PROG:
1680 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1681 case XDP_SETUP_XSK_POOL:
1682 if (xdp->xsk.pool)
1683 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1684 else
1685 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1686 default:
1687 return -EINVAL;
1688 }
1689 }
1690
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1691 int gve_adjust_queues(struct gve_priv *priv,
1692 struct gve_queue_config new_rx_config,
1693 struct gve_queue_config new_tx_config)
1694 {
1695 int err;
1696
1697 if (netif_carrier_ok(priv->dev)) {
1698 /* To make this process as simple as possible we teardown the
1699 * device, set the new configuration, and then bring the device
1700 * up again.
1701 */
1702 err = gve_close(priv->dev);
1703 /* we have already tried to reset in close,
1704 * just fail at this point
1705 */
1706 if (err)
1707 return err;
1708 priv->tx_cfg = new_tx_config;
1709 priv->rx_cfg = new_rx_config;
1710
1711 err = gve_open(priv->dev);
1712 if (err)
1713 goto err;
1714
1715 return 0;
1716 }
1717 /* Set the config for the next up. */
1718 priv->tx_cfg = new_tx_config;
1719 priv->rx_cfg = new_rx_config;
1720
1721 return 0;
1722 err:
1723 netif_err(priv, drv, priv->dev,
1724 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1725 gve_turndown(priv);
1726 return err;
1727 }
1728
gve_turndown(struct gve_priv * priv)1729 static void gve_turndown(struct gve_priv *priv)
1730 {
1731 int idx;
1732
1733 if (netif_carrier_ok(priv->dev))
1734 netif_carrier_off(priv->dev);
1735
1736 if (!gve_get_napi_enabled(priv))
1737 return;
1738
1739 /* Disable napi to prevent more work from coming in */
1740 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1741 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1742 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1743
1744 napi_disable(&block->napi);
1745 }
1746 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1747 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1748 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1749
1750 napi_disable(&block->napi);
1751 }
1752
1753 /* Stop tx queues */
1754 netif_tx_disable(priv->dev);
1755
1756 gve_clear_napi_enabled(priv);
1757 gve_clear_report_stats(priv);
1758
1759 /* Make sure that all traffic is finished processing. */
1760 synchronize_net();
1761 }
1762
gve_turnup(struct gve_priv * priv)1763 static void gve_turnup(struct gve_priv *priv)
1764 {
1765 int idx;
1766
1767 /* Start the tx queues */
1768 netif_tx_start_all_queues(priv->dev);
1769
1770 /* Enable napi and unmask interrupts for all queues */
1771 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1772 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1773 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1774
1775 napi_enable(&block->napi);
1776 if (gve_is_gqi(priv)) {
1777 iowrite32be(0, gve_irq_doorbell(priv, block));
1778 } else {
1779 gve_set_itr_coalesce_usecs_dqo(priv, block,
1780 priv->tx_coalesce_usecs);
1781 }
1782 }
1783 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1784 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1785 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1786
1787 napi_enable(&block->napi);
1788 if (gve_is_gqi(priv)) {
1789 iowrite32be(0, gve_irq_doorbell(priv, block));
1790 } else {
1791 gve_set_itr_coalesce_usecs_dqo(priv, block,
1792 priv->rx_coalesce_usecs);
1793 }
1794 }
1795
1796 gve_set_napi_enabled(priv);
1797 }
1798
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1799 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1800 {
1801 struct gve_notify_block *block;
1802 struct gve_tx_ring *tx = NULL;
1803 struct gve_priv *priv;
1804 u32 last_nic_done;
1805 u32 current_time;
1806 u32 ntfy_idx;
1807
1808 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1809 priv = netdev_priv(dev);
1810 if (txqueue > priv->tx_cfg.num_queues)
1811 goto reset;
1812
1813 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1814 if (ntfy_idx >= priv->num_ntfy_blks)
1815 goto reset;
1816
1817 block = &priv->ntfy_blocks[ntfy_idx];
1818 tx = block->tx;
1819
1820 current_time = jiffies_to_msecs(jiffies);
1821 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1822 goto reset;
1823
1824 /* Check to see if there are missed completions, which will allow us to
1825 * kick the queue.
1826 */
1827 last_nic_done = gve_tx_load_event_counter(priv, tx);
1828 if (last_nic_done - tx->done) {
1829 netdev_info(dev, "Kicking queue %d", txqueue);
1830 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1831 napi_schedule(&block->napi);
1832 tx->last_kick_msec = current_time;
1833 goto out;
1834 } // Else reset.
1835
1836 reset:
1837 gve_schedule_reset(priv);
1838
1839 out:
1840 if (tx)
1841 tx->queue_timeout++;
1842 priv->tx_timeo_cnt++;
1843 }
1844
gve_set_features(struct net_device * netdev,netdev_features_t features)1845 static int gve_set_features(struct net_device *netdev,
1846 netdev_features_t features)
1847 {
1848 const netdev_features_t orig_features = netdev->features;
1849 struct gve_priv *priv = netdev_priv(netdev);
1850 int err;
1851
1852 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1853 netdev->features ^= NETIF_F_LRO;
1854 if (netif_carrier_ok(netdev)) {
1855 /* To make this process as simple as possible we
1856 * teardown the device, set the new configuration,
1857 * and then bring the device up again.
1858 */
1859 err = gve_close(netdev);
1860 /* We have already tried to reset in close, just fail
1861 * at this point.
1862 */
1863 if (err)
1864 goto err;
1865
1866 err = gve_open(netdev);
1867 if (err)
1868 goto err;
1869 }
1870 }
1871
1872 return 0;
1873 err:
1874 /* Reverts the change on error. */
1875 netdev->features = orig_features;
1876 netif_err(priv, drv, netdev,
1877 "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1878 return err;
1879 }
1880
1881 static const struct net_device_ops gve_netdev_ops = {
1882 .ndo_start_xmit = gve_start_xmit,
1883 .ndo_open = gve_open,
1884 .ndo_stop = gve_close,
1885 .ndo_get_stats64 = gve_get_stats,
1886 .ndo_tx_timeout = gve_tx_timeout,
1887 .ndo_set_features = gve_set_features,
1888 .ndo_bpf = gve_xdp,
1889 .ndo_xdp_xmit = gve_xdp_xmit,
1890 .ndo_xsk_wakeup = gve_xsk_wakeup,
1891 };
1892
gve_handle_status(struct gve_priv * priv,u32 status)1893 static void gve_handle_status(struct gve_priv *priv, u32 status)
1894 {
1895 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1896 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1897 gve_set_do_reset(priv);
1898 }
1899 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1900 priv->stats_report_trigger_cnt++;
1901 gve_set_do_report_stats(priv);
1902 }
1903 }
1904
gve_handle_reset(struct gve_priv * priv)1905 static void gve_handle_reset(struct gve_priv *priv)
1906 {
1907 /* A service task will be scheduled at the end of probe to catch any
1908 * resets that need to happen, and we don't want to reset until
1909 * probe is done.
1910 */
1911 if (gve_get_probe_in_progress(priv))
1912 return;
1913
1914 if (gve_get_do_reset(priv)) {
1915 rtnl_lock();
1916 gve_reset(priv, false);
1917 rtnl_unlock();
1918 }
1919 }
1920
gve_handle_report_stats(struct gve_priv * priv)1921 void gve_handle_report_stats(struct gve_priv *priv)
1922 {
1923 struct stats *stats = priv->stats_report->stats;
1924 int idx, stats_idx = 0;
1925 unsigned int start = 0;
1926 u64 tx_bytes;
1927
1928 if (!gve_get_report_stats(priv))
1929 return;
1930
1931 be64_add_cpu(&priv->stats_report->written_count, 1);
1932 /* tx stats */
1933 if (priv->tx) {
1934 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1935 u32 last_completion = 0;
1936 u32 tx_frames = 0;
1937
1938 /* DQO doesn't currently support these metrics. */
1939 if (gve_is_gqi(priv)) {
1940 last_completion = priv->tx[idx].done;
1941 tx_frames = priv->tx[idx].req;
1942 }
1943
1944 do {
1945 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1946 tx_bytes = priv->tx[idx].bytes_done;
1947 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1948 stats[stats_idx++] = (struct stats) {
1949 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1950 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1951 .queue_id = cpu_to_be32(idx),
1952 };
1953 stats[stats_idx++] = (struct stats) {
1954 .stat_name = cpu_to_be32(TX_STOP_CNT),
1955 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1956 .queue_id = cpu_to_be32(idx),
1957 };
1958 stats[stats_idx++] = (struct stats) {
1959 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1960 .value = cpu_to_be64(tx_frames),
1961 .queue_id = cpu_to_be32(idx),
1962 };
1963 stats[stats_idx++] = (struct stats) {
1964 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1965 .value = cpu_to_be64(tx_bytes),
1966 .queue_id = cpu_to_be32(idx),
1967 };
1968 stats[stats_idx++] = (struct stats) {
1969 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1970 .value = cpu_to_be64(last_completion),
1971 .queue_id = cpu_to_be32(idx),
1972 };
1973 stats[stats_idx++] = (struct stats) {
1974 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1975 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1976 .queue_id = cpu_to_be32(idx),
1977 };
1978 }
1979 }
1980 /* rx stats */
1981 if (priv->rx) {
1982 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1983 stats[stats_idx++] = (struct stats) {
1984 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1985 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1986 .queue_id = cpu_to_be32(idx),
1987 };
1988 stats[stats_idx++] = (struct stats) {
1989 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1990 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1991 .queue_id = cpu_to_be32(idx),
1992 };
1993 }
1994 }
1995 }
1996
1997 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1998 static void gve_service_task(struct work_struct *work)
1999 {
2000 struct gve_priv *priv = container_of(work, struct gve_priv,
2001 service_task);
2002 u32 status = ioread32be(&priv->reg_bar0->device_status);
2003
2004 gve_handle_status(priv, status);
2005
2006 gve_handle_reset(priv);
2007 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2008 }
2009
gve_set_netdev_xdp_features(struct gve_priv * priv)2010 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2011 {
2012 xdp_features_t xdp_features;
2013
2014 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2015 xdp_features = NETDEV_XDP_ACT_BASIC;
2016 xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2017 xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2018 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2019 } else {
2020 xdp_features = 0;
2021 }
2022
2023 xdp_set_features_flag(priv->dev, xdp_features);
2024 }
2025
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2026 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2027 {
2028 int num_ntfy;
2029 int err;
2030
2031 /* Set up the adminq */
2032 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2033 if (err) {
2034 dev_err(&priv->pdev->dev,
2035 "Failed to alloc admin queue: err=%d\n", err);
2036 return err;
2037 }
2038
2039 err = gve_verify_driver_compatibility(priv);
2040 if (err) {
2041 dev_err(&priv->pdev->dev,
2042 "Could not verify driver compatibility: err=%d\n", err);
2043 goto err;
2044 }
2045
2046 if (skip_describe_device)
2047 goto setup_device;
2048
2049 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2050 /* Get the initial information we need from the device */
2051 err = gve_adminq_describe_device(priv);
2052 if (err) {
2053 dev_err(&priv->pdev->dev,
2054 "Could not get device information: err=%d\n", err);
2055 goto err;
2056 }
2057 priv->dev->mtu = priv->dev->max_mtu;
2058 num_ntfy = pci_msix_vec_count(priv->pdev);
2059 if (num_ntfy <= 0) {
2060 dev_err(&priv->pdev->dev,
2061 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2062 err = num_ntfy;
2063 goto err;
2064 } else if (num_ntfy < GVE_MIN_MSIX) {
2065 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2066 GVE_MIN_MSIX, num_ntfy);
2067 err = -EINVAL;
2068 goto err;
2069 }
2070
2071 /* Big TCP is only supported on DQ*/
2072 if (!gve_is_gqi(priv))
2073 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2074
2075 priv->num_registered_pages = 0;
2076 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2077 /* gvnic has one Notification Block per MSI-x vector, except for the
2078 * management vector
2079 */
2080 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2081 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2082
2083 priv->tx_cfg.max_queues =
2084 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2085 priv->rx_cfg.max_queues =
2086 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2087
2088 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2089 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2090 if (priv->default_num_queues > 0) {
2091 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2092 priv->tx_cfg.num_queues);
2093 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2094 priv->rx_cfg.num_queues);
2095 }
2096
2097 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2098 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2099 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2100 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2101
2102 if (!gve_is_gqi(priv)) {
2103 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2104 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2105 }
2106
2107 setup_device:
2108 gve_set_netdev_xdp_features(priv);
2109 err = gve_setup_device_resources(priv);
2110 if (!err)
2111 return 0;
2112 err:
2113 gve_adminq_free(&priv->pdev->dev, priv);
2114 return err;
2115 }
2116
gve_teardown_priv_resources(struct gve_priv * priv)2117 static void gve_teardown_priv_resources(struct gve_priv *priv)
2118 {
2119 gve_teardown_device_resources(priv);
2120 gve_adminq_free(&priv->pdev->dev, priv);
2121 }
2122
gve_trigger_reset(struct gve_priv * priv)2123 static void gve_trigger_reset(struct gve_priv *priv)
2124 {
2125 /* Reset the device by releasing the AQ */
2126 gve_adminq_release(priv);
2127 }
2128
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2129 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2130 {
2131 gve_trigger_reset(priv);
2132 /* With the reset having already happened, close cannot fail */
2133 if (was_up)
2134 gve_close(priv->dev);
2135 gve_teardown_priv_resources(priv);
2136 }
2137
gve_reset_recovery(struct gve_priv * priv,bool was_up)2138 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2139 {
2140 int err;
2141
2142 err = gve_init_priv(priv, true);
2143 if (err)
2144 goto err;
2145 if (was_up) {
2146 err = gve_open(priv->dev);
2147 if (err)
2148 goto err;
2149 }
2150 return 0;
2151 err:
2152 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2153 gve_turndown(priv);
2154 return err;
2155 }
2156
gve_reset(struct gve_priv * priv,bool attempt_teardown)2157 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2158 {
2159 bool was_up = netif_carrier_ok(priv->dev);
2160 int err;
2161
2162 dev_info(&priv->pdev->dev, "Performing reset\n");
2163 gve_clear_do_reset(priv);
2164 gve_set_reset_in_progress(priv);
2165 /* If we aren't attempting to teardown normally, just go turndown and
2166 * reset right away.
2167 */
2168 if (!attempt_teardown) {
2169 gve_turndown(priv);
2170 gve_reset_and_teardown(priv, was_up);
2171 } else {
2172 /* Otherwise attempt to close normally */
2173 if (was_up) {
2174 err = gve_close(priv->dev);
2175 /* If that fails reset as we did above */
2176 if (err)
2177 gve_reset_and_teardown(priv, was_up);
2178 }
2179 /* Clean up any remaining resources */
2180 gve_teardown_priv_resources(priv);
2181 }
2182
2183 /* Set it all back up */
2184 err = gve_reset_recovery(priv, was_up);
2185 gve_clear_reset_in_progress(priv);
2186 priv->reset_cnt++;
2187 priv->interface_up_cnt = 0;
2188 priv->interface_down_cnt = 0;
2189 priv->stats_report_trigger_cnt = 0;
2190 return err;
2191 }
2192
gve_write_version(u8 __iomem * driver_version_register)2193 static void gve_write_version(u8 __iomem *driver_version_register)
2194 {
2195 const char *c = gve_version_prefix;
2196
2197 while (*c) {
2198 writeb(*c, driver_version_register);
2199 c++;
2200 }
2201
2202 c = gve_version_str;
2203 while (*c) {
2204 writeb(*c, driver_version_register);
2205 c++;
2206 }
2207 writeb('\n', driver_version_register);
2208 }
2209
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2210 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2211 {
2212 int max_tx_queues, max_rx_queues;
2213 struct net_device *dev;
2214 __be32 __iomem *db_bar;
2215 struct gve_registers __iomem *reg_bar;
2216 struct gve_priv *priv;
2217 int err;
2218
2219 err = pci_enable_device(pdev);
2220 if (err)
2221 return err;
2222
2223 err = pci_request_regions(pdev, gve_driver_name);
2224 if (err)
2225 goto abort_with_enabled;
2226
2227 pci_set_master(pdev);
2228
2229 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2230 if (err) {
2231 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2232 goto abort_with_pci_region;
2233 }
2234
2235 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2236 if (!reg_bar) {
2237 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2238 err = -ENOMEM;
2239 goto abort_with_pci_region;
2240 }
2241
2242 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2243 if (!db_bar) {
2244 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2245 err = -ENOMEM;
2246 goto abort_with_reg_bar;
2247 }
2248
2249 gve_write_version(®_bar->driver_version);
2250 /* Get max queues to alloc etherdev */
2251 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2252 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2253 /* Alloc and setup the netdev and priv */
2254 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2255 if (!dev) {
2256 dev_err(&pdev->dev, "could not allocate netdev\n");
2257 err = -ENOMEM;
2258 goto abort_with_db_bar;
2259 }
2260 SET_NETDEV_DEV(dev, &pdev->dev);
2261 pci_set_drvdata(pdev, dev);
2262 dev->ethtool_ops = &gve_ethtool_ops;
2263 dev->netdev_ops = &gve_netdev_ops;
2264
2265 /* Set default and supported features.
2266 *
2267 * Features might be set in other locations as well (such as
2268 * `gve_adminq_describe_device`).
2269 */
2270 dev->hw_features = NETIF_F_HIGHDMA;
2271 dev->hw_features |= NETIF_F_SG;
2272 dev->hw_features |= NETIF_F_HW_CSUM;
2273 dev->hw_features |= NETIF_F_TSO;
2274 dev->hw_features |= NETIF_F_TSO6;
2275 dev->hw_features |= NETIF_F_TSO_ECN;
2276 dev->hw_features |= NETIF_F_RXCSUM;
2277 dev->hw_features |= NETIF_F_RXHASH;
2278 dev->features = dev->hw_features;
2279 dev->watchdog_timeo = 5 * HZ;
2280 dev->min_mtu = ETH_MIN_MTU;
2281 netif_carrier_off(dev);
2282
2283 priv = netdev_priv(dev);
2284 priv->dev = dev;
2285 priv->pdev = pdev;
2286 priv->msg_enable = DEFAULT_MSG_LEVEL;
2287 priv->reg_bar0 = reg_bar;
2288 priv->db_bar2 = db_bar;
2289 priv->service_task_flags = 0x0;
2290 priv->state_flags = 0x0;
2291 priv->ethtool_flags = 0x0;
2292
2293 gve_set_probe_in_progress(priv);
2294 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2295 if (!priv->gve_wq) {
2296 dev_err(&pdev->dev, "Could not allocate workqueue");
2297 err = -ENOMEM;
2298 goto abort_with_netdev;
2299 }
2300 INIT_WORK(&priv->service_task, gve_service_task);
2301 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2302 priv->tx_cfg.max_queues = max_tx_queues;
2303 priv->rx_cfg.max_queues = max_rx_queues;
2304
2305 err = gve_init_priv(priv, false);
2306 if (err)
2307 goto abort_with_wq;
2308
2309 err = register_netdev(dev);
2310 if (err)
2311 goto abort_with_gve_init;
2312
2313 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2314 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2315 gve_clear_probe_in_progress(priv);
2316 queue_work(priv->gve_wq, &priv->service_task);
2317 return 0;
2318
2319 abort_with_gve_init:
2320 gve_teardown_priv_resources(priv);
2321
2322 abort_with_wq:
2323 destroy_workqueue(priv->gve_wq);
2324
2325 abort_with_netdev:
2326 free_netdev(dev);
2327
2328 abort_with_db_bar:
2329 pci_iounmap(pdev, db_bar);
2330
2331 abort_with_reg_bar:
2332 pci_iounmap(pdev, reg_bar);
2333
2334 abort_with_pci_region:
2335 pci_release_regions(pdev);
2336
2337 abort_with_enabled:
2338 pci_disable_device(pdev);
2339 return err;
2340 }
2341
gve_remove(struct pci_dev * pdev)2342 static void gve_remove(struct pci_dev *pdev)
2343 {
2344 struct net_device *netdev = pci_get_drvdata(pdev);
2345 struct gve_priv *priv = netdev_priv(netdev);
2346 __be32 __iomem *db_bar = priv->db_bar2;
2347 void __iomem *reg_bar = priv->reg_bar0;
2348
2349 unregister_netdev(netdev);
2350 gve_teardown_priv_resources(priv);
2351 destroy_workqueue(priv->gve_wq);
2352 free_netdev(netdev);
2353 pci_iounmap(pdev, db_bar);
2354 pci_iounmap(pdev, reg_bar);
2355 pci_release_regions(pdev);
2356 pci_disable_device(pdev);
2357 }
2358
gve_shutdown(struct pci_dev * pdev)2359 static void gve_shutdown(struct pci_dev *pdev)
2360 {
2361 struct net_device *netdev = pci_get_drvdata(pdev);
2362 struct gve_priv *priv = netdev_priv(netdev);
2363 bool was_up = netif_carrier_ok(priv->dev);
2364
2365 rtnl_lock();
2366 if (was_up && gve_close(priv->dev)) {
2367 /* If the dev was up, attempt to close, if close fails, reset */
2368 gve_reset_and_teardown(priv, was_up);
2369 } else {
2370 /* If the dev wasn't up or close worked, finish tearing down */
2371 gve_teardown_priv_resources(priv);
2372 }
2373 rtnl_unlock();
2374 }
2375
2376 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2377 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2378 {
2379 struct net_device *netdev = pci_get_drvdata(pdev);
2380 struct gve_priv *priv = netdev_priv(netdev);
2381 bool was_up = netif_carrier_ok(priv->dev);
2382
2383 priv->suspend_cnt++;
2384 rtnl_lock();
2385 if (was_up && gve_close(priv->dev)) {
2386 /* If the dev was up, attempt to close, if close fails, reset */
2387 gve_reset_and_teardown(priv, was_up);
2388 } else {
2389 /* If the dev wasn't up or close worked, finish tearing down */
2390 gve_teardown_priv_resources(priv);
2391 }
2392 priv->up_before_suspend = was_up;
2393 rtnl_unlock();
2394 return 0;
2395 }
2396
gve_resume(struct pci_dev * pdev)2397 static int gve_resume(struct pci_dev *pdev)
2398 {
2399 struct net_device *netdev = pci_get_drvdata(pdev);
2400 struct gve_priv *priv = netdev_priv(netdev);
2401 int err;
2402
2403 priv->resume_cnt++;
2404 rtnl_lock();
2405 err = gve_reset_recovery(priv, priv->up_before_suspend);
2406 rtnl_unlock();
2407 return err;
2408 }
2409 #endif /* CONFIG_PM */
2410
2411 static const struct pci_device_id gve_id_table[] = {
2412 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2413 { }
2414 };
2415
2416 static struct pci_driver gve_driver = {
2417 .name = gve_driver_name,
2418 .id_table = gve_id_table,
2419 .probe = gve_probe,
2420 .remove = gve_remove,
2421 .shutdown = gve_shutdown,
2422 #ifdef CONFIG_PM
2423 .suspend = gve_suspend,
2424 .resume = gve_resume,
2425 #endif
2426 };
2427
2428 module_pci_driver(gve_driver);
2429
2430 MODULE_DEVICE_TABLE(pci, gve_id_table);
2431 MODULE_AUTHOR("Google, Inc.");
2432 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2433 MODULE_LICENSE("Dual MIT/GPL");
2434 MODULE_VERSION(GVE_VERSION);
2435