1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25
26 #define GVE_DEFAULT_RX_COPYBREAK (256)
27
28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION "1.0.0"
30 #define GVE_VERSION_PREFIX "GVE-"
31
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34
35 char gve_driver_name[] = "gve";
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
gve_verify_driver_compatibility(struct gve_priv * priv)39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41 int err;
42 struct gve_driver_info *driver_info;
43 dma_addr_t driver_info_bus;
44
45 driver_info = dma_alloc_coherent(&priv->pdev->dev,
46 sizeof(struct gve_driver_info),
47 &driver_info_bus, GFP_KERNEL);
48 if (!driver_info)
49 return -ENOMEM;
50
51 *driver_info = (struct gve_driver_info) {
52 .os_type = 1, /* Linux */
53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56 .driver_capability_flags = {
57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61 },
62 };
63 strscpy(driver_info->os_version_str1, utsname()->release,
64 sizeof(driver_info->os_version_str1));
65 strscpy(driver_info->os_version_str2, utsname()->version,
66 sizeof(driver_info->os_version_str2));
67
68 err = gve_adminq_verify_driver_compatibility(priv,
69 sizeof(struct gve_driver_info),
70 driver_info_bus);
71
72 /* It's ok if the device doesn't support this */
73 if (err == -EOPNOTSUPP)
74 err = 0;
75
76 dma_free_coherent(&priv->pdev->dev,
77 sizeof(struct gve_driver_info),
78 driver_info, driver_info_bus);
79 return err;
80 }
81
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
83 {
84 struct gve_priv *priv = netdev_priv(dev);
85
86 if (gve_is_gqi(priv))
87 return gve_tx(skb, dev);
88 else
89 return gve_tx_dqo(skb, dev);
90 }
91
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
93 {
94 struct gve_priv *priv = netdev_priv(dev);
95 unsigned int start;
96 u64 packets, bytes;
97 int num_tx_queues;
98 int ring;
99
100 num_tx_queues = gve_num_tx_queues(priv);
101 if (priv->rx) {
102 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
103 do {
104 start =
105 u64_stats_fetch_begin(&priv->rx[ring].statss);
106 packets = priv->rx[ring].rpackets;
107 bytes = priv->rx[ring].rbytes;
108 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
109 start));
110 s->rx_packets += packets;
111 s->rx_bytes += bytes;
112 }
113 }
114 if (priv->tx) {
115 for (ring = 0; ring < num_tx_queues; ring++) {
116 do {
117 start =
118 u64_stats_fetch_begin(&priv->tx[ring].statss);
119 packets = priv->tx[ring].pkt_done;
120 bytes = priv->tx[ring].bytes_done;
121 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
122 start));
123 s->tx_packets += packets;
124 s->tx_bytes += bytes;
125 }
126 }
127 }
128
gve_alloc_counter_array(struct gve_priv * priv)129 static int gve_alloc_counter_array(struct gve_priv *priv)
130 {
131 priv->counter_array =
132 dma_alloc_coherent(&priv->pdev->dev,
133 priv->num_event_counters *
134 sizeof(*priv->counter_array),
135 &priv->counter_array_bus, GFP_KERNEL);
136 if (!priv->counter_array)
137 return -ENOMEM;
138
139 return 0;
140 }
141
gve_free_counter_array(struct gve_priv * priv)142 static void gve_free_counter_array(struct gve_priv *priv)
143 {
144 if (!priv->counter_array)
145 return;
146
147 dma_free_coherent(&priv->pdev->dev,
148 priv->num_event_counters *
149 sizeof(*priv->counter_array),
150 priv->counter_array, priv->counter_array_bus);
151 priv->counter_array = NULL;
152 }
153
154 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)155 static void gve_stats_report_task(struct work_struct *work)
156 {
157 struct gve_priv *priv = container_of(work, struct gve_priv,
158 stats_report_task);
159 if (gve_get_do_report_stats(priv)) {
160 gve_handle_report_stats(priv);
161 gve_clear_do_report_stats(priv);
162 }
163 }
164
gve_stats_report_schedule(struct gve_priv * priv)165 static void gve_stats_report_schedule(struct gve_priv *priv)
166 {
167 if (!gve_get_probe_in_progress(priv) &&
168 !gve_get_reset_in_progress(priv)) {
169 gve_set_do_report_stats(priv);
170 queue_work(priv->gve_wq, &priv->stats_report_task);
171 }
172 }
173
gve_stats_report_timer(struct timer_list * t)174 static void gve_stats_report_timer(struct timer_list *t)
175 {
176 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
177
178 mod_timer(&priv->stats_report_timer,
179 round_jiffies(jiffies +
180 msecs_to_jiffies(priv->stats_report_timer_period)));
181 gve_stats_report_schedule(priv);
182 }
183
gve_alloc_stats_report(struct gve_priv * priv)184 static int gve_alloc_stats_report(struct gve_priv *priv)
185 {
186 int tx_stats_num, rx_stats_num;
187
188 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
189 gve_num_tx_queues(priv);
190 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
191 priv->rx_cfg.num_queues;
192 priv->stats_report_len = struct_size(priv->stats_report, stats,
193 size_add(tx_stats_num, rx_stats_num));
194 priv->stats_report =
195 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
196 &priv->stats_report_bus, GFP_KERNEL);
197 if (!priv->stats_report)
198 return -ENOMEM;
199 /* Set up timer for the report-stats task */
200 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
201 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
202 return 0;
203 }
204
gve_free_stats_report(struct gve_priv * priv)205 static void gve_free_stats_report(struct gve_priv *priv)
206 {
207 if (!priv->stats_report)
208 return;
209
210 del_timer_sync(&priv->stats_report_timer);
211 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
212 priv->stats_report, priv->stats_report_bus);
213 priv->stats_report = NULL;
214 }
215
gve_mgmnt_intr(int irq,void * arg)216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
217 {
218 struct gve_priv *priv = arg;
219
220 queue_work(priv->gve_wq, &priv->service_task);
221 return IRQ_HANDLED;
222 }
223
gve_intr(int irq,void * arg)224 static irqreturn_t gve_intr(int irq, void *arg)
225 {
226 struct gve_notify_block *block = arg;
227 struct gve_priv *priv = block->priv;
228
229 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
230 napi_schedule_irqoff(&block->napi);
231 return IRQ_HANDLED;
232 }
233
gve_intr_dqo(int irq,void * arg)234 static irqreturn_t gve_intr_dqo(int irq, void *arg)
235 {
236 struct gve_notify_block *block = arg;
237
238 /* Interrupts are automatically masked */
239 napi_schedule_irqoff(&block->napi);
240 return IRQ_HANDLED;
241 }
242
gve_napi_poll(struct napi_struct * napi,int budget)243 static int gve_napi_poll(struct napi_struct *napi, int budget)
244 {
245 struct gve_notify_block *block;
246 __be32 __iomem *irq_doorbell;
247 bool reschedule = false;
248 struct gve_priv *priv;
249 int work_done = 0;
250
251 block = container_of(napi, struct gve_notify_block, napi);
252 priv = block->priv;
253
254 if (block->tx) {
255 if (block->tx->q_num < priv->tx_cfg.num_queues)
256 reschedule |= gve_tx_poll(block, budget);
257 else if (budget)
258 reschedule |= gve_xdp_poll(block, budget);
259 }
260
261 if (!budget)
262 return 0;
263
264 if (block->rx) {
265 work_done = gve_rx_poll(block, budget);
266 reschedule |= work_done == budget;
267 }
268
269 if (reschedule)
270 return budget;
271
272 /* Complete processing - don't unmask irq if busy polling is enabled */
273 if (likely(napi_complete_done(napi, work_done))) {
274 irq_doorbell = gve_irq_doorbell(priv, block);
275 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
276
277 /* Ensure IRQ ACK is visible before we check pending work.
278 * If queue had issued updates, it would be truly visible.
279 */
280 mb();
281
282 if (block->tx)
283 reschedule |= gve_tx_clean_pending(priv, block->tx);
284 if (block->rx)
285 reschedule |= gve_rx_work_pending(block->rx);
286
287 if (reschedule && napi_reschedule(napi))
288 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
289 }
290 return work_done;
291 }
292
gve_napi_poll_dqo(struct napi_struct * napi,int budget)293 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
294 {
295 struct gve_notify_block *block =
296 container_of(napi, struct gve_notify_block, napi);
297 struct gve_priv *priv = block->priv;
298 bool reschedule = false;
299 int work_done = 0;
300
301 if (block->tx)
302 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
303
304 if (!budget)
305 return 0;
306
307 if (block->rx) {
308 work_done = gve_rx_poll_dqo(block, budget);
309 reschedule |= work_done == budget;
310 }
311
312 if (reschedule)
313 return budget;
314
315 if (likely(napi_complete_done(napi, work_done))) {
316 /* Enable interrupts again.
317 *
318 * We don't need to repoll afterwards because HW supports the
319 * PCI MSI-X PBA feature.
320 *
321 * Another interrupt would be triggered if a new event came in
322 * since the last one.
323 */
324 gve_write_irq_doorbell_dqo(priv, block,
325 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
326 }
327
328 return work_done;
329 }
330
gve_alloc_notify_blocks(struct gve_priv * priv)331 static int gve_alloc_notify_blocks(struct gve_priv *priv)
332 {
333 int num_vecs_requested = priv->num_ntfy_blks + 1;
334 unsigned int active_cpus;
335 int vecs_enabled;
336 int i, j;
337 int err;
338
339 priv->msix_vectors = kvcalloc(num_vecs_requested,
340 sizeof(*priv->msix_vectors), GFP_KERNEL);
341 if (!priv->msix_vectors)
342 return -ENOMEM;
343 for (i = 0; i < num_vecs_requested; i++)
344 priv->msix_vectors[i].entry = i;
345 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
346 GVE_MIN_MSIX, num_vecs_requested);
347 if (vecs_enabled < 0) {
348 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
349 GVE_MIN_MSIX, vecs_enabled);
350 err = vecs_enabled;
351 goto abort_with_msix_vectors;
352 }
353 if (vecs_enabled != num_vecs_requested) {
354 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
355 int vecs_per_type = new_num_ntfy_blks / 2;
356 int vecs_left = new_num_ntfy_blks % 2;
357
358 priv->num_ntfy_blks = new_num_ntfy_blks;
359 priv->mgmt_msix_idx = priv->num_ntfy_blks;
360 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
361 vecs_per_type);
362 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
363 vecs_per_type + vecs_left);
364 dev_err(&priv->pdev->dev,
365 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
366 vecs_enabled, priv->tx_cfg.max_queues,
367 priv->rx_cfg.max_queues);
368 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
369 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
370 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
371 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
372 }
373 /* Half the notification blocks go to TX and half to RX */
374 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
375
376 /* Setup Management Vector - the last vector */
377 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
378 pci_name(priv->pdev));
379 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
380 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
381 if (err) {
382 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
383 goto abort_with_msix_enabled;
384 }
385 priv->irq_db_indices =
386 dma_alloc_coherent(&priv->pdev->dev,
387 priv->num_ntfy_blks *
388 sizeof(*priv->irq_db_indices),
389 &priv->irq_db_indices_bus, GFP_KERNEL);
390 if (!priv->irq_db_indices) {
391 err = -ENOMEM;
392 goto abort_with_mgmt_vector;
393 }
394
395 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
396 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
397 if (!priv->ntfy_blocks) {
398 err = -ENOMEM;
399 goto abort_with_irq_db_indices;
400 }
401
402 /* Setup the other blocks - the first n-1 vectors */
403 for (i = 0; i < priv->num_ntfy_blks; i++) {
404 struct gve_notify_block *block = &priv->ntfy_blocks[i];
405 int msix_idx = i;
406
407 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
408 i, pci_name(priv->pdev));
409 block->priv = priv;
410 err = request_irq(priv->msix_vectors[msix_idx].vector,
411 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
412 0, block->name, block);
413 if (err) {
414 dev_err(&priv->pdev->dev,
415 "Failed to receive msix vector %d\n", i);
416 goto abort_with_some_ntfy_blocks;
417 }
418 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
419 get_cpu_mask(i % active_cpus));
420 block->irq_db_index = &priv->irq_db_indices[i].index;
421 }
422 return 0;
423 abort_with_some_ntfy_blocks:
424 for (j = 0; j < i; j++) {
425 struct gve_notify_block *block = &priv->ntfy_blocks[j];
426 int msix_idx = j;
427
428 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
429 NULL);
430 free_irq(priv->msix_vectors[msix_idx].vector, block);
431 }
432 kvfree(priv->ntfy_blocks);
433 priv->ntfy_blocks = NULL;
434 abort_with_irq_db_indices:
435 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
436 sizeof(*priv->irq_db_indices),
437 priv->irq_db_indices, priv->irq_db_indices_bus);
438 priv->irq_db_indices = NULL;
439 abort_with_mgmt_vector:
440 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
441 abort_with_msix_enabled:
442 pci_disable_msix(priv->pdev);
443 abort_with_msix_vectors:
444 kvfree(priv->msix_vectors);
445 priv->msix_vectors = NULL;
446 return err;
447 }
448
gve_free_notify_blocks(struct gve_priv * priv)449 static void gve_free_notify_blocks(struct gve_priv *priv)
450 {
451 int i;
452
453 if (!priv->msix_vectors)
454 return;
455
456 /* Free the irqs */
457 for (i = 0; i < priv->num_ntfy_blks; i++) {
458 struct gve_notify_block *block = &priv->ntfy_blocks[i];
459 int msix_idx = i;
460
461 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
462 NULL);
463 free_irq(priv->msix_vectors[msix_idx].vector, block);
464 }
465 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
466 kvfree(priv->ntfy_blocks);
467 priv->ntfy_blocks = NULL;
468 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
469 sizeof(*priv->irq_db_indices),
470 priv->irq_db_indices, priv->irq_db_indices_bus);
471 priv->irq_db_indices = NULL;
472 pci_disable_msix(priv->pdev);
473 kvfree(priv->msix_vectors);
474 priv->msix_vectors = NULL;
475 }
476
gve_setup_device_resources(struct gve_priv * priv)477 static int gve_setup_device_resources(struct gve_priv *priv)
478 {
479 int err;
480
481 err = gve_alloc_counter_array(priv);
482 if (err)
483 return err;
484 err = gve_alloc_notify_blocks(priv);
485 if (err)
486 goto abort_with_counter;
487 err = gve_alloc_stats_report(priv);
488 if (err)
489 goto abort_with_ntfy_blocks;
490 err = gve_adminq_configure_device_resources(priv,
491 priv->counter_array_bus,
492 priv->num_event_counters,
493 priv->irq_db_indices_bus,
494 priv->num_ntfy_blks);
495 if (unlikely(err)) {
496 dev_err(&priv->pdev->dev,
497 "could not setup device_resources: err=%d\n", err);
498 err = -ENXIO;
499 goto abort_with_stats_report;
500 }
501
502 if (!gve_is_gqi(priv)) {
503 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
504 GFP_KERNEL);
505 if (!priv->ptype_lut_dqo) {
506 err = -ENOMEM;
507 goto abort_with_stats_report;
508 }
509 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
510 if (err) {
511 dev_err(&priv->pdev->dev,
512 "Failed to get ptype map: err=%d\n", err);
513 goto abort_with_ptype_lut;
514 }
515 }
516
517 err = gve_adminq_report_stats(priv, priv->stats_report_len,
518 priv->stats_report_bus,
519 GVE_STATS_REPORT_TIMER_PERIOD);
520 if (err)
521 dev_err(&priv->pdev->dev,
522 "Failed to report stats: err=%d\n", err);
523 gve_set_device_resources_ok(priv);
524 return 0;
525
526 abort_with_ptype_lut:
527 kvfree(priv->ptype_lut_dqo);
528 priv->ptype_lut_dqo = NULL;
529 abort_with_stats_report:
530 gve_free_stats_report(priv);
531 abort_with_ntfy_blocks:
532 gve_free_notify_blocks(priv);
533 abort_with_counter:
534 gve_free_counter_array(priv);
535
536 return err;
537 }
538
539 static void gve_trigger_reset(struct gve_priv *priv);
540
gve_teardown_device_resources(struct gve_priv * priv)541 static void gve_teardown_device_resources(struct gve_priv *priv)
542 {
543 int err;
544
545 /* Tell device its resources are being freed */
546 if (gve_get_device_resources_ok(priv)) {
547 /* detach the stats report */
548 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
549 if (err) {
550 dev_err(&priv->pdev->dev,
551 "Failed to detach stats report: err=%d\n", err);
552 gve_trigger_reset(priv);
553 }
554 err = gve_adminq_deconfigure_device_resources(priv);
555 if (err) {
556 dev_err(&priv->pdev->dev,
557 "Could not deconfigure device resources: err=%d\n",
558 err);
559 gve_trigger_reset(priv);
560 }
561 }
562
563 kvfree(priv->ptype_lut_dqo);
564 priv->ptype_lut_dqo = NULL;
565
566 gve_free_counter_array(priv);
567 gve_free_notify_blocks(priv);
568 gve_free_stats_report(priv);
569 gve_clear_device_resources_ok(priv);
570 }
571
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))572 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
573 int (*gve_poll)(struct napi_struct *, int))
574 {
575 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
576
577 netif_napi_add(priv->dev, &block->napi, gve_poll);
578 }
579
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)580 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
581 {
582 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
583
584 netif_napi_del(&block->napi);
585 }
586
gve_register_xdp_qpls(struct gve_priv * priv)587 static int gve_register_xdp_qpls(struct gve_priv *priv)
588 {
589 int start_id;
590 int err;
591 int i;
592
593 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
594 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
595 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
596 if (err) {
597 netif_err(priv, drv, priv->dev,
598 "failed to register queue page list %d\n",
599 priv->qpls[i].id);
600 /* This failure will trigger a reset - no need to clean
601 * up
602 */
603 return err;
604 }
605 }
606 return 0;
607 }
608
gve_register_qpls(struct gve_priv * priv)609 static int gve_register_qpls(struct gve_priv *priv)
610 {
611 int start_id;
612 int err;
613 int i;
614
615 start_id = gve_tx_start_qpl_id(priv);
616 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
617 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
618 if (err) {
619 netif_err(priv, drv, priv->dev,
620 "failed to register queue page list %d\n",
621 priv->qpls[i].id);
622 /* This failure will trigger a reset - no need to clean
623 * up
624 */
625 return err;
626 }
627 }
628
629 start_id = gve_rx_start_qpl_id(priv);
630 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
631 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
632 if (err) {
633 netif_err(priv, drv, priv->dev,
634 "failed to register queue page list %d\n",
635 priv->qpls[i].id);
636 /* This failure will trigger a reset - no need to clean
637 * up
638 */
639 return err;
640 }
641 }
642 return 0;
643 }
644
gve_unregister_xdp_qpls(struct gve_priv * priv)645 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
646 {
647 int start_id;
648 int err;
649 int i;
650
651 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
652 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
653 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
654 /* This failure will trigger a reset - no need to clean up */
655 if (err) {
656 netif_err(priv, drv, priv->dev,
657 "Failed to unregister queue page list %d\n",
658 priv->qpls[i].id);
659 return err;
660 }
661 }
662 return 0;
663 }
664
gve_unregister_qpls(struct gve_priv * priv)665 static int gve_unregister_qpls(struct gve_priv *priv)
666 {
667 int start_id;
668 int err;
669 int i;
670
671 start_id = gve_tx_start_qpl_id(priv);
672 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
673 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
674 /* This failure will trigger a reset - no need to clean up */
675 if (err) {
676 netif_err(priv, drv, priv->dev,
677 "Failed to unregister queue page list %d\n",
678 priv->qpls[i].id);
679 return err;
680 }
681 }
682
683 start_id = gve_rx_start_qpl_id(priv);
684 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
685 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
686 /* This failure will trigger a reset - no need to clean up */
687 if (err) {
688 netif_err(priv, drv, priv->dev,
689 "Failed to unregister queue page list %d\n",
690 priv->qpls[i].id);
691 return err;
692 }
693 }
694 return 0;
695 }
696
gve_create_xdp_rings(struct gve_priv * priv)697 static int gve_create_xdp_rings(struct gve_priv *priv)
698 {
699 int err;
700
701 err = gve_adminq_create_tx_queues(priv,
702 gve_xdp_tx_start_queue_id(priv),
703 priv->num_xdp_queues);
704 if (err) {
705 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
706 priv->num_xdp_queues);
707 /* This failure will trigger a reset - no need to clean
708 * up
709 */
710 return err;
711 }
712 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
713 priv->num_xdp_queues);
714
715 return 0;
716 }
717
gve_create_rings(struct gve_priv * priv)718 static int gve_create_rings(struct gve_priv *priv)
719 {
720 int num_tx_queues = gve_num_tx_queues(priv);
721 int err;
722 int i;
723
724 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
725 if (err) {
726 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
727 num_tx_queues);
728 /* This failure will trigger a reset - no need to clean
729 * up
730 */
731 return err;
732 }
733 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
734 num_tx_queues);
735
736 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
737 if (err) {
738 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
739 priv->rx_cfg.num_queues);
740 /* This failure will trigger a reset - no need to clean
741 * up
742 */
743 return err;
744 }
745 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
746 priv->rx_cfg.num_queues);
747
748 if (gve_is_gqi(priv)) {
749 /* Rx data ring has been prefilled with packet buffers at queue
750 * allocation time.
751 *
752 * Write the doorbell to provide descriptor slots and packet
753 * buffers to the NIC.
754 */
755 for (i = 0; i < priv->rx_cfg.num_queues; i++)
756 gve_rx_write_doorbell(priv, &priv->rx[i]);
757 } else {
758 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
759 /* Post buffers and ring doorbell. */
760 gve_rx_post_buffers_dqo(&priv->rx[i]);
761 }
762 }
763
764 return 0;
765 }
766
add_napi_init_xdp_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))767 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
768 int (*napi_poll)(struct napi_struct *napi,
769 int budget))
770 {
771 int start_id = gve_xdp_tx_start_queue_id(priv);
772 int i;
773
774 /* Add xdp tx napi & init sync stats*/
775 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
776 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
777
778 u64_stats_init(&priv->tx[i].statss);
779 priv->tx[i].ntfy_id = ntfy_idx;
780 gve_add_napi(priv, ntfy_idx, napi_poll);
781 }
782 }
783
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))784 static void add_napi_init_sync_stats(struct gve_priv *priv,
785 int (*napi_poll)(struct napi_struct *napi,
786 int budget))
787 {
788 int i;
789
790 /* Add tx napi & init sync stats*/
791 for (i = 0; i < gve_num_tx_queues(priv); i++) {
792 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
793
794 u64_stats_init(&priv->tx[i].statss);
795 priv->tx[i].ntfy_id = ntfy_idx;
796 gve_add_napi(priv, ntfy_idx, napi_poll);
797 }
798 /* Add rx napi & init sync stats*/
799 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
800 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
801
802 u64_stats_init(&priv->rx[i].statss);
803 priv->rx[i].ntfy_id = ntfy_idx;
804 gve_add_napi(priv, ntfy_idx, napi_poll);
805 }
806 }
807
gve_tx_free_rings(struct gve_priv * priv,int start_id,int num_rings)808 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
809 {
810 if (gve_is_gqi(priv)) {
811 gve_tx_free_rings_gqi(priv, start_id, num_rings);
812 } else {
813 gve_tx_free_rings_dqo(priv);
814 }
815 }
816
gve_alloc_xdp_rings(struct gve_priv * priv)817 static int gve_alloc_xdp_rings(struct gve_priv *priv)
818 {
819 int start_id;
820 int err = 0;
821
822 if (!priv->num_xdp_queues)
823 return 0;
824
825 start_id = gve_xdp_tx_start_queue_id(priv);
826 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
827 if (err)
828 return err;
829 add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
830
831 return 0;
832 }
833
gve_alloc_rings(struct gve_priv * priv)834 static int gve_alloc_rings(struct gve_priv *priv)
835 {
836 int err;
837
838 /* Setup tx rings */
839 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
840 GFP_KERNEL);
841 if (!priv->tx)
842 return -ENOMEM;
843
844 if (gve_is_gqi(priv))
845 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
846 else
847 err = gve_tx_alloc_rings_dqo(priv);
848 if (err)
849 goto free_tx;
850
851 /* Setup rx rings */
852 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
853 GFP_KERNEL);
854 if (!priv->rx) {
855 err = -ENOMEM;
856 goto free_tx_queue;
857 }
858
859 if (gve_is_gqi(priv))
860 err = gve_rx_alloc_rings(priv);
861 else
862 err = gve_rx_alloc_rings_dqo(priv);
863 if (err)
864 goto free_rx;
865
866 if (gve_is_gqi(priv))
867 add_napi_init_sync_stats(priv, gve_napi_poll);
868 else
869 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
870
871 return 0;
872
873 free_rx:
874 kvfree(priv->rx);
875 priv->rx = NULL;
876 free_tx_queue:
877 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
878 free_tx:
879 kvfree(priv->tx);
880 priv->tx = NULL;
881 return err;
882 }
883
gve_destroy_xdp_rings(struct gve_priv * priv)884 static int gve_destroy_xdp_rings(struct gve_priv *priv)
885 {
886 int start_id;
887 int err;
888
889 start_id = gve_xdp_tx_start_queue_id(priv);
890 err = gve_adminq_destroy_tx_queues(priv,
891 start_id,
892 priv->num_xdp_queues);
893 if (err) {
894 netif_err(priv, drv, priv->dev,
895 "failed to destroy XDP queues\n");
896 /* This failure will trigger a reset - no need to clean up */
897 return err;
898 }
899 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
900
901 return 0;
902 }
903
gve_destroy_rings(struct gve_priv * priv)904 static int gve_destroy_rings(struct gve_priv *priv)
905 {
906 int num_tx_queues = gve_num_tx_queues(priv);
907 int err;
908
909 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
910 if (err) {
911 netif_err(priv, drv, priv->dev,
912 "failed to destroy tx queues\n");
913 /* This failure will trigger a reset - no need to clean up */
914 return err;
915 }
916 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
917 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
918 if (err) {
919 netif_err(priv, drv, priv->dev,
920 "failed to destroy rx queues\n");
921 /* This failure will trigger a reset - no need to clean up */
922 return err;
923 }
924 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
925 return 0;
926 }
927
gve_rx_free_rings(struct gve_priv * priv)928 static void gve_rx_free_rings(struct gve_priv *priv)
929 {
930 if (gve_is_gqi(priv))
931 gve_rx_free_rings_gqi(priv);
932 else
933 gve_rx_free_rings_dqo(priv);
934 }
935
gve_free_xdp_rings(struct gve_priv * priv)936 static void gve_free_xdp_rings(struct gve_priv *priv)
937 {
938 int ntfy_idx, start_id;
939 int i;
940
941 start_id = gve_xdp_tx_start_queue_id(priv);
942 if (priv->tx) {
943 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
944 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
945 gve_remove_napi(priv, ntfy_idx);
946 }
947 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
948 }
949 }
950
gve_free_rings(struct gve_priv * priv)951 static void gve_free_rings(struct gve_priv *priv)
952 {
953 int num_tx_queues = gve_num_tx_queues(priv);
954 int ntfy_idx;
955 int i;
956
957 if (priv->tx) {
958 for (i = 0; i < num_tx_queues; i++) {
959 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
960 gve_remove_napi(priv, ntfy_idx);
961 }
962 gve_tx_free_rings(priv, 0, num_tx_queues);
963 kvfree(priv->tx);
964 priv->tx = NULL;
965 }
966 if (priv->rx) {
967 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
968 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
969 gve_remove_napi(priv, ntfy_idx);
970 }
971 gve_rx_free_rings(priv);
972 kvfree(priv->rx);
973 priv->rx = NULL;
974 }
975 }
976
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)977 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
978 struct page **page, dma_addr_t *dma,
979 enum dma_data_direction dir, gfp_t gfp_flags)
980 {
981 *page = alloc_page(gfp_flags);
982 if (!*page) {
983 priv->page_alloc_fail++;
984 return -ENOMEM;
985 }
986 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
987 if (dma_mapping_error(dev, *dma)) {
988 priv->dma_mapping_error++;
989 put_page(*page);
990 return -ENOMEM;
991 }
992 return 0;
993 }
994
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)995 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
996 int pages)
997 {
998 struct gve_queue_page_list *qpl = &priv->qpls[id];
999 int err;
1000 int i;
1001
1002 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
1003 netif_err(priv, drv, priv->dev,
1004 "Reached max number of registered pages %llu > %llu\n",
1005 pages + priv->num_registered_pages,
1006 priv->max_registered_pages);
1007 return -EINVAL;
1008 }
1009
1010 qpl->id = id;
1011 qpl->num_entries = 0;
1012 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1013 /* caller handles clean up */
1014 if (!qpl->pages)
1015 return -ENOMEM;
1016 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1017 /* caller handles clean up */
1018 if (!qpl->page_buses)
1019 return -ENOMEM;
1020
1021 for (i = 0; i < pages; i++) {
1022 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1023 &qpl->page_buses[i],
1024 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1025 /* caller handles clean up */
1026 if (err)
1027 return -ENOMEM;
1028 qpl->num_entries++;
1029 }
1030 priv->num_registered_pages += pages;
1031
1032 return 0;
1033 }
1034
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1035 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1036 enum dma_data_direction dir)
1037 {
1038 if (!dma_mapping_error(dev, dma))
1039 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1040 if (page)
1041 put_page(page);
1042 }
1043
gve_free_queue_page_list(struct gve_priv * priv,u32 id)1044 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1045 {
1046 struct gve_queue_page_list *qpl = &priv->qpls[id];
1047 int i;
1048
1049 if (!qpl->pages)
1050 return;
1051 if (!qpl->page_buses)
1052 goto free_pages;
1053
1054 for (i = 0; i < qpl->num_entries; i++)
1055 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1056 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1057
1058 kvfree(qpl->page_buses);
1059 qpl->page_buses = NULL;
1060 free_pages:
1061 kvfree(qpl->pages);
1062 qpl->pages = NULL;
1063 priv->num_registered_pages -= qpl->num_entries;
1064 }
1065
gve_alloc_xdp_qpls(struct gve_priv * priv)1066 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1067 {
1068 int start_id;
1069 int i, j;
1070 int err;
1071
1072 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1073 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1074 err = gve_alloc_queue_page_list(priv, i,
1075 priv->tx_pages_per_qpl);
1076 if (err)
1077 goto free_qpls;
1078 }
1079
1080 return 0;
1081
1082 free_qpls:
1083 for (j = start_id; j <= i; j++)
1084 gve_free_queue_page_list(priv, j);
1085 return err;
1086 }
1087
gve_alloc_qpls(struct gve_priv * priv)1088 static int gve_alloc_qpls(struct gve_priv *priv)
1089 {
1090 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1091 int page_count;
1092 int start_id;
1093 int i, j;
1094 int err;
1095
1096 if (!gve_is_qpl(priv))
1097 return 0;
1098
1099 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1100 if (!priv->qpls)
1101 return -ENOMEM;
1102
1103 start_id = gve_tx_start_qpl_id(priv);
1104 page_count = priv->tx_pages_per_qpl;
1105 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1106 err = gve_alloc_queue_page_list(priv, i,
1107 page_count);
1108 if (err)
1109 goto free_qpls;
1110 }
1111
1112 start_id = gve_rx_start_qpl_id(priv);
1113
1114 /* For GQI_QPL number of pages allocated have 1:1 relationship with
1115 * number of descriptors. For DQO, number of pages required are
1116 * more than descriptors (because of out of order completions).
1117 */
1118 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1119 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1120 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1121 err = gve_alloc_queue_page_list(priv, i,
1122 page_count);
1123 if (err)
1124 goto free_qpls;
1125 }
1126
1127 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1128 sizeof(unsigned long) * BITS_PER_BYTE;
1129 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1130 sizeof(unsigned long), GFP_KERNEL);
1131 if (!priv->qpl_cfg.qpl_id_map) {
1132 err = -ENOMEM;
1133 goto free_qpls;
1134 }
1135
1136 return 0;
1137
1138 free_qpls:
1139 for (j = 0; j <= i; j++)
1140 gve_free_queue_page_list(priv, j);
1141 kvfree(priv->qpls);
1142 priv->qpls = NULL;
1143 return err;
1144 }
1145
gve_free_xdp_qpls(struct gve_priv * priv)1146 static void gve_free_xdp_qpls(struct gve_priv *priv)
1147 {
1148 int start_id;
1149 int i;
1150
1151 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1152 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1153 gve_free_queue_page_list(priv, i);
1154 }
1155
gve_free_qpls(struct gve_priv * priv)1156 static void gve_free_qpls(struct gve_priv *priv)
1157 {
1158 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1159 int i;
1160
1161 if (!priv->qpls)
1162 return;
1163
1164 kvfree(priv->qpl_cfg.qpl_id_map);
1165 priv->qpl_cfg.qpl_id_map = NULL;
1166
1167 for (i = 0; i < max_queues; i++)
1168 gve_free_queue_page_list(priv, i);
1169
1170 kvfree(priv->qpls);
1171 priv->qpls = NULL;
1172 }
1173
1174 /* Use this to schedule a reset when the device is capable of continuing
1175 * to handle other requests in its current state. If it is not, do a reset
1176 * in thread instead.
1177 */
gve_schedule_reset(struct gve_priv * priv)1178 void gve_schedule_reset(struct gve_priv *priv)
1179 {
1180 gve_set_do_reset(priv);
1181 queue_work(priv->gve_wq, &priv->service_task);
1182 }
1183
1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1186 static void gve_turndown(struct gve_priv *priv);
1187 static void gve_turnup(struct gve_priv *priv);
1188
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1190 {
1191 struct napi_struct *napi;
1192 struct gve_rx_ring *rx;
1193 int err = 0;
1194 int i, j;
1195 u32 tx_qid;
1196
1197 if (!priv->num_xdp_queues)
1198 return 0;
1199
1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1201 rx = &priv->rx[i];
1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1203
1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1205 napi->napi_id);
1206 if (err)
1207 goto err;
1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1209 MEM_TYPE_PAGE_SHARED, NULL);
1210 if (err)
1211 goto err;
1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1213 if (rx->xsk_pool) {
1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1215 napi->napi_id);
1216 if (err)
1217 goto err;
1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1219 MEM_TYPE_XSK_BUFF_POOL, NULL);
1220 if (err)
1221 goto err;
1222 xsk_pool_set_rxq_info(rx->xsk_pool,
1223 &rx->xsk_rxq);
1224 }
1225 }
1226
1227 for (i = 0; i < priv->num_xdp_queues; i++) {
1228 tx_qid = gve_xdp_tx_queue_id(priv, i);
1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1230 }
1231 return 0;
1232
1233 err:
1234 for (j = i; j >= 0; j--) {
1235 rx = &priv->rx[j];
1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1237 xdp_rxq_info_unreg(&rx->xdp_rxq);
1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1239 xdp_rxq_info_unreg(&rx->xsk_rxq);
1240 }
1241 return err;
1242 }
1243
gve_unreg_xdp_info(struct gve_priv * priv)1244 static void gve_unreg_xdp_info(struct gve_priv *priv)
1245 {
1246 int i, tx_qid;
1247
1248 if (!priv->num_xdp_queues)
1249 return;
1250
1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1252 struct gve_rx_ring *rx = &priv->rx[i];
1253
1254 xdp_rxq_info_unreg(&rx->xdp_rxq);
1255 if (rx->xsk_pool) {
1256 xdp_rxq_info_unreg(&rx->xsk_rxq);
1257 rx->xsk_pool = NULL;
1258 }
1259 }
1260
1261 for (i = 0; i < priv->num_xdp_queues; i++) {
1262 tx_qid = gve_xdp_tx_queue_id(priv, i);
1263 priv->tx[tx_qid].xsk_pool = NULL;
1264 }
1265 }
1266
gve_drain_page_cache(struct gve_priv * priv)1267 static void gve_drain_page_cache(struct gve_priv *priv)
1268 {
1269 struct page_frag_cache *nc;
1270 int i;
1271
1272 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1273 nc = &priv->rx[i].page_cache;
1274 if (nc->va) {
1275 __page_frag_cache_drain(virt_to_page(nc->va),
1276 nc->pagecnt_bias);
1277 nc->va = NULL;
1278 }
1279 }
1280 }
1281
gve_open(struct net_device * dev)1282 static int gve_open(struct net_device *dev)
1283 {
1284 struct gve_priv *priv = netdev_priv(dev);
1285 int err;
1286
1287 if (priv->xdp_prog)
1288 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1289 else
1290 priv->num_xdp_queues = 0;
1291
1292 err = gve_alloc_qpls(priv);
1293 if (err)
1294 return err;
1295
1296 err = gve_alloc_rings(priv);
1297 if (err)
1298 goto free_qpls;
1299
1300 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1301 if (err)
1302 goto free_rings;
1303 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1304 if (err)
1305 goto free_rings;
1306
1307 err = gve_reg_xdp_info(priv, dev);
1308 if (err)
1309 goto free_rings;
1310
1311 err = gve_register_qpls(priv);
1312 if (err)
1313 goto reset;
1314
1315 if (!gve_is_gqi(priv)) {
1316 /* Hard code this for now. This may be tuned in the future for
1317 * performance.
1318 */
1319 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1320 }
1321 err = gve_create_rings(priv);
1322 if (err)
1323 goto reset;
1324
1325 gve_set_device_rings_ok(priv);
1326
1327 if (gve_get_report_stats(priv))
1328 mod_timer(&priv->stats_report_timer,
1329 round_jiffies(jiffies +
1330 msecs_to_jiffies(priv->stats_report_timer_period)));
1331
1332 gve_turnup(priv);
1333 queue_work(priv->gve_wq, &priv->service_task);
1334 priv->interface_up_cnt++;
1335 return 0;
1336
1337 free_rings:
1338 gve_free_rings(priv);
1339 free_qpls:
1340 gve_free_qpls(priv);
1341 return err;
1342
1343 reset:
1344 /* This must have been called from a reset due to the rtnl lock
1345 * so just return at this point.
1346 */
1347 if (gve_get_reset_in_progress(priv))
1348 return err;
1349 /* Otherwise reset before returning */
1350 gve_reset_and_teardown(priv, true);
1351 /* if this fails there is nothing we can do so just ignore the return */
1352 gve_reset_recovery(priv, false);
1353 /* return the original error */
1354 return err;
1355 }
1356
gve_close(struct net_device * dev)1357 static int gve_close(struct net_device *dev)
1358 {
1359 struct gve_priv *priv = netdev_priv(dev);
1360 int err;
1361
1362 netif_carrier_off(dev);
1363 if (gve_get_device_rings_ok(priv)) {
1364 gve_turndown(priv);
1365 gve_drain_page_cache(priv);
1366 err = gve_destroy_rings(priv);
1367 if (err)
1368 goto err;
1369 err = gve_unregister_qpls(priv);
1370 if (err)
1371 goto err;
1372 gve_clear_device_rings_ok(priv);
1373 }
1374 del_timer_sync(&priv->stats_report_timer);
1375
1376 gve_unreg_xdp_info(priv);
1377 gve_free_rings(priv);
1378 gve_free_qpls(priv);
1379 priv->interface_down_cnt++;
1380 return 0;
1381
1382 err:
1383 /* This must have been called from a reset due to the rtnl lock
1384 * so just return at this point.
1385 */
1386 if (gve_get_reset_in_progress(priv))
1387 return err;
1388 /* Otherwise reset before returning */
1389 gve_reset_and_teardown(priv, true);
1390 return gve_reset_recovery(priv, false);
1391 }
1392
gve_remove_xdp_queues(struct gve_priv * priv)1393 static int gve_remove_xdp_queues(struct gve_priv *priv)
1394 {
1395 int err;
1396
1397 err = gve_destroy_xdp_rings(priv);
1398 if (err)
1399 return err;
1400
1401 err = gve_unregister_xdp_qpls(priv);
1402 if (err)
1403 return err;
1404
1405 gve_unreg_xdp_info(priv);
1406 gve_free_xdp_rings(priv);
1407 gve_free_xdp_qpls(priv);
1408 priv->num_xdp_queues = 0;
1409 return 0;
1410 }
1411
gve_add_xdp_queues(struct gve_priv * priv)1412 static int gve_add_xdp_queues(struct gve_priv *priv)
1413 {
1414 int err;
1415
1416 priv->num_xdp_queues = priv->tx_cfg.num_queues;
1417
1418 err = gve_alloc_xdp_qpls(priv);
1419 if (err)
1420 goto err;
1421
1422 err = gve_alloc_xdp_rings(priv);
1423 if (err)
1424 goto free_xdp_qpls;
1425
1426 err = gve_reg_xdp_info(priv, priv->dev);
1427 if (err)
1428 goto free_xdp_rings;
1429
1430 err = gve_register_xdp_qpls(priv);
1431 if (err)
1432 goto free_xdp_rings;
1433
1434 err = gve_create_xdp_rings(priv);
1435 if (err)
1436 goto free_xdp_rings;
1437
1438 return 0;
1439
1440 free_xdp_rings:
1441 gve_free_xdp_rings(priv);
1442 free_xdp_qpls:
1443 gve_free_xdp_qpls(priv);
1444 err:
1445 priv->num_xdp_queues = 0;
1446 return err;
1447 }
1448
gve_handle_link_status(struct gve_priv * priv,bool link_status)1449 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1450 {
1451 if (!gve_get_napi_enabled(priv))
1452 return;
1453
1454 if (link_status == netif_carrier_ok(priv->dev))
1455 return;
1456
1457 if (link_status) {
1458 netdev_info(priv->dev, "Device link is up.\n");
1459 netif_carrier_on(priv->dev);
1460 } else {
1461 netdev_info(priv->dev, "Device link is down.\n");
1462 netif_carrier_off(priv->dev);
1463 }
1464 }
1465
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1466 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1467 struct netlink_ext_ack *extack)
1468 {
1469 struct bpf_prog *old_prog;
1470 int err = 0;
1471 u32 status;
1472
1473 old_prog = READ_ONCE(priv->xdp_prog);
1474 if (!netif_carrier_ok(priv->dev)) {
1475 WRITE_ONCE(priv->xdp_prog, prog);
1476 if (old_prog)
1477 bpf_prog_put(old_prog);
1478 return 0;
1479 }
1480
1481 gve_turndown(priv);
1482 if (!old_prog && prog) {
1483 // Allocate XDP TX queues if an XDP program is
1484 // being installed
1485 err = gve_add_xdp_queues(priv);
1486 if (err)
1487 goto out;
1488 } else if (old_prog && !prog) {
1489 // Remove XDP TX queues if an XDP program is
1490 // being uninstalled
1491 err = gve_remove_xdp_queues(priv);
1492 if (err)
1493 goto out;
1494 }
1495 WRITE_ONCE(priv->xdp_prog, prog);
1496 if (old_prog)
1497 bpf_prog_put(old_prog);
1498
1499 out:
1500 gve_turnup(priv);
1501 status = ioread32be(&priv->reg_bar0->device_status);
1502 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1503 return err;
1504 }
1505
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1506 static int gve_xsk_pool_enable(struct net_device *dev,
1507 struct xsk_buff_pool *pool,
1508 u16 qid)
1509 {
1510 struct gve_priv *priv = netdev_priv(dev);
1511 struct napi_struct *napi;
1512 struct gve_rx_ring *rx;
1513 int tx_qid;
1514 int err;
1515
1516 if (qid >= priv->rx_cfg.num_queues) {
1517 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1518 return -EINVAL;
1519 }
1520 if (xsk_pool_get_rx_frame_size(pool) <
1521 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1522 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1523 return -EINVAL;
1524 }
1525
1526 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1527 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1528 if (err)
1529 return err;
1530
1531 /* If XDP prog is not installed, return */
1532 if (!priv->xdp_prog)
1533 return 0;
1534
1535 rx = &priv->rx[qid];
1536 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1537 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1538 if (err)
1539 goto err;
1540
1541 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1542 MEM_TYPE_XSK_BUFF_POOL, NULL);
1543 if (err)
1544 goto err;
1545
1546 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1547 rx->xsk_pool = pool;
1548
1549 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1550 priv->tx[tx_qid].xsk_pool = pool;
1551
1552 return 0;
1553 err:
1554 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1555 xdp_rxq_info_unreg(&rx->xsk_rxq);
1556
1557 xsk_pool_dma_unmap(pool,
1558 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1559 return err;
1560 }
1561
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1562 static int gve_xsk_pool_disable(struct net_device *dev,
1563 u16 qid)
1564 {
1565 struct gve_priv *priv = netdev_priv(dev);
1566 struct napi_struct *napi_rx;
1567 struct napi_struct *napi_tx;
1568 struct xsk_buff_pool *pool;
1569 int tx_qid;
1570
1571 pool = xsk_get_pool_from_qid(dev, qid);
1572 if (!pool)
1573 return -EINVAL;
1574 if (qid >= priv->rx_cfg.num_queues)
1575 return -EINVAL;
1576
1577 /* If XDP prog is not installed, unmap DMA and return */
1578 if (!priv->xdp_prog)
1579 goto done;
1580
1581 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1582 if (!netif_running(dev)) {
1583 priv->rx[qid].xsk_pool = NULL;
1584 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1585 priv->tx[tx_qid].xsk_pool = NULL;
1586 goto done;
1587 }
1588
1589 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1590 napi_disable(napi_rx); /* make sure current rx poll is done */
1591
1592 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1593 napi_disable(napi_tx); /* make sure current tx poll is done */
1594
1595 priv->rx[qid].xsk_pool = NULL;
1596 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1597 priv->tx[tx_qid].xsk_pool = NULL;
1598 smp_mb(); /* Make sure it is visible to the workers on datapath */
1599
1600 napi_enable(napi_rx);
1601 if (gve_rx_work_pending(&priv->rx[qid]))
1602 napi_schedule(napi_rx);
1603
1604 napi_enable(napi_tx);
1605 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1606 napi_schedule(napi_tx);
1607
1608 done:
1609 xsk_pool_dma_unmap(pool,
1610 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1611 return 0;
1612 }
1613
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1614 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1615 {
1616 struct gve_priv *priv = netdev_priv(dev);
1617 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1618
1619 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1620 return -EINVAL;
1621
1622 if (flags & XDP_WAKEUP_TX) {
1623 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1624 struct napi_struct *napi =
1625 &priv->ntfy_blocks[tx->ntfy_id].napi;
1626
1627 if (!napi_if_scheduled_mark_missed(napi)) {
1628 /* Call local_bh_enable to trigger SoftIRQ processing */
1629 local_bh_disable();
1630 napi_schedule(napi);
1631 local_bh_enable();
1632 }
1633
1634 tx->xdp_xsk_wakeup++;
1635 }
1636
1637 return 0;
1638 }
1639
verify_xdp_configuration(struct net_device * dev)1640 static int verify_xdp_configuration(struct net_device *dev)
1641 {
1642 struct gve_priv *priv = netdev_priv(dev);
1643
1644 if (dev->features & NETIF_F_LRO) {
1645 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1646 return -EOPNOTSUPP;
1647 }
1648
1649 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1650 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1651 priv->queue_format);
1652 return -EOPNOTSUPP;
1653 }
1654
1655 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
1656 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1657 dev->mtu);
1658 return -EOPNOTSUPP;
1659 }
1660
1661 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1662 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1663 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1664 priv->rx_cfg.num_queues,
1665 priv->tx_cfg.num_queues,
1666 priv->tx_cfg.max_queues);
1667 return -EINVAL;
1668 }
1669 return 0;
1670 }
1671
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1672 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1673 {
1674 struct gve_priv *priv = netdev_priv(dev);
1675 int err;
1676
1677 err = verify_xdp_configuration(dev);
1678 if (err)
1679 return err;
1680 switch (xdp->command) {
1681 case XDP_SETUP_PROG:
1682 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1683 case XDP_SETUP_XSK_POOL:
1684 if (xdp->xsk.pool)
1685 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1686 else
1687 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1688 default:
1689 return -EINVAL;
1690 }
1691 }
1692
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1693 int gve_adjust_queues(struct gve_priv *priv,
1694 struct gve_queue_config new_rx_config,
1695 struct gve_queue_config new_tx_config)
1696 {
1697 int err;
1698
1699 if (netif_carrier_ok(priv->dev)) {
1700 /* To make this process as simple as possible we teardown the
1701 * device, set the new configuration, and then bring the device
1702 * up again.
1703 */
1704 err = gve_close(priv->dev);
1705 /* we have already tried to reset in close,
1706 * just fail at this point
1707 */
1708 if (err)
1709 return err;
1710 priv->tx_cfg = new_tx_config;
1711 priv->rx_cfg = new_rx_config;
1712
1713 err = gve_open(priv->dev);
1714 if (err)
1715 goto err;
1716
1717 return 0;
1718 }
1719 /* Set the config for the next up. */
1720 priv->tx_cfg = new_tx_config;
1721 priv->rx_cfg = new_rx_config;
1722
1723 return 0;
1724 err:
1725 netif_err(priv, drv, priv->dev,
1726 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1727 gve_turndown(priv);
1728 return err;
1729 }
1730
gve_turndown(struct gve_priv * priv)1731 static void gve_turndown(struct gve_priv *priv)
1732 {
1733 int idx;
1734
1735 if (netif_carrier_ok(priv->dev))
1736 netif_carrier_off(priv->dev);
1737
1738 if (!gve_get_napi_enabled(priv))
1739 return;
1740
1741 /* Disable napi to prevent more work from coming in */
1742 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1743 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1744 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1745
1746 napi_disable(&block->napi);
1747 }
1748 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1749 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1750 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1751
1752 napi_disable(&block->napi);
1753 }
1754
1755 /* Stop tx queues */
1756 netif_tx_disable(priv->dev);
1757
1758 gve_clear_napi_enabled(priv);
1759 gve_clear_report_stats(priv);
1760 }
1761
gve_turnup(struct gve_priv * priv)1762 static void gve_turnup(struct gve_priv *priv)
1763 {
1764 int idx;
1765
1766 /* Start the tx queues */
1767 netif_tx_start_all_queues(priv->dev);
1768
1769 /* Enable napi and unmask interrupts for all queues */
1770 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1771 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1772 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1773
1774 napi_enable(&block->napi);
1775 if (gve_is_gqi(priv)) {
1776 iowrite32be(0, gve_irq_doorbell(priv, block));
1777 } else {
1778 gve_set_itr_coalesce_usecs_dqo(priv, block,
1779 priv->tx_coalesce_usecs);
1780 }
1781 }
1782 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1783 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1784 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1785
1786 napi_enable(&block->napi);
1787 if (gve_is_gqi(priv)) {
1788 iowrite32be(0, gve_irq_doorbell(priv, block));
1789 } else {
1790 gve_set_itr_coalesce_usecs_dqo(priv, block,
1791 priv->rx_coalesce_usecs);
1792 }
1793 }
1794
1795 gve_set_napi_enabled(priv);
1796 }
1797
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1798 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1799 {
1800 struct gve_notify_block *block;
1801 struct gve_tx_ring *tx = NULL;
1802 struct gve_priv *priv;
1803 u32 last_nic_done;
1804 u32 current_time;
1805 u32 ntfy_idx;
1806
1807 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1808 priv = netdev_priv(dev);
1809 if (txqueue > priv->tx_cfg.num_queues)
1810 goto reset;
1811
1812 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1813 if (ntfy_idx >= priv->num_ntfy_blks)
1814 goto reset;
1815
1816 block = &priv->ntfy_blocks[ntfy_idx];
1817 tx = block->tx;
1818
1819 current_time = jiffies_to_msecs(jiffies);
1820 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1821 goto reset;
1822
1823 /* Check to see if there are missed completions, which will allow us to
1824 * kick the queue.
1825 */
1826 last_nic_done = gve_tx_load_event_counter(priv, tx);
1827 if (last_nic_done - tx->done) {
1828 netdev_info(dev, "Kicking queue %d", txqueue);
1829 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1830 napi_schedule(&block->napi);
1831 tx->last_kick_msec = current_time;
1832 goto out;
1833 } // Else reset.
1834
1835 reset:
1836 gve_schedule_reset(priv);
1837
1838 out:
1839 if (tx)
1840 tx->queue_timeout++;
1841 priv->tx_timeo_cnt++;
1842 }
1843
gve_set_features(struct net_device * netdev,netdev_features_t features)1844 static int gve_set_features(struct net_device *netdev,
1845 netdev_features_t features)
1846 {
1847 const netdev_features_t orig_features = netdev->features;
1848 struct gve_priv *priv = netdev_priv(netdev);
1849 int err;
1850
1851 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1852 netdev->features ^= NETIF_F_LRO;
1853 if (netif_carrier_ok(netdev)) {
1854 /* To make this process as simple as possible we
1855 * teardown the device, set the new configuration,
1856 * and then bring the device up again.
1857 */
1858 err = gve_close(netdev);
1859 /* We have already tried to reset in close, just fail
1860 * at this point.
1861 */
1862 if (err)
1863 goto err;
1864
1865 err = gve_open(netdev);
1866 if (err)
1867 goto err;
1868 }
1869 }
1870
1871 return 0;
1872 err:
1873 /* Reverts the change on error. */
1874 netdev->features = orig_features;
1875 netif_err(priv, drv, netdev,
1876 "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1877 return err;
1878 }
1879
1880 static const struct net_device_ops gve_netdev_ops = {
1881 .ndo_start_xmit = gve_start_xmit,
1882 .ndo_open = gve_open,
1883 .ndo_stop = gve_close,
1884 .ndo_get_stats64 = gve_get_stats,
1885 .ndo_tx_timeout = gve_tx_timeout,
1886 .ndo_set_features = gve_set_features,
1887 .ndo_bpf = gve_xdp,
1888 .ndo_xdp_xmit = gve_xdp_xmit,
1889 .ndo_xsk_wakeup = gve_xsk_wakeup,
1890 };
1891
gve_handle_status(struct gve_priv * priv,u32 status)1892 static void gve_handle_status(struct gve_priv *priv, u32 status)
1893 {
1894 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1895 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1896 gve_set_do_reset(priv);
1897 }
1898 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1899 priv->stats_report_trigger_cnt++;
1900 gve_set_do_report_stats(priv);
1901 }
1902 }
1903
gve_handle_reset(struct gve_priv * priv)1904 static void gve_handle_reset(struct gve_priv *priv)
1905 {
1906 /* A service task will be scheduled at the end of probe to catch any
1907 * resets that need to happen, and we don't want to reset until
1908 * probe is done.
1909 */
1910 if (gve_get_probe_in_progress(priv))
1911 return;
1912
1913 if (gve_get_do_reset(priv)) {
1914 rtnl_lock();
1915 gve_reset(priv, false);
1916 rtnl_unlock();
1917 }
1918 }
1919
gve_handle_report_stats(struct gve_priv * priv)1920 void gve_handle_report_stats(struct gve_priv *priv)
1921 {
1922 struct stats *stats = priv->stats_report->stats;
1923 int idx, stats_idx = 0;
1924 unsigned int start = 0;
1925 u64 tx_bytes;
1926
1927 if (!gve_get_report_stats(priv))
1928 return;
1929
1930 be64_add_cpu(&priv->stats_report->written_count, 1);
1931 /* tx stats */
1932 if (priv->tx) {
1933 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1934 u32 last_completion = 0;
1935 u32 tx_frames = 0;
1936
1937 /* DQO doesn't currently support these metrics. */
1938 if (gve_is_gqi(priv)) {
1939 last_completion = priv->tx[idx].done;
1940 tx_frames = priv->tx[idx].req;
1941 }
1942
1943 do {
1944 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1945 tx_bytes = priv->tx[idx].bytes_done;
1946 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1947 stats[stats_idx++] = (struct stats) {
1948 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1949 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1950 .queue_id = cpu_to_be32(idx),
1951 };
1952 stats[stats_idx++] = (struct stats) {
1953 .stat_name = cpu_to_be32(TX_STOP_CNT),
1954 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1955 .queue_id = cpu_to_be32(idx),
1956 };
1957 stats[stats_idx++] = (struct stats) {
1958 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1959 .value = cpu_to_be64(tx_frames),
1960 .queue_id = cpu_to_be32(idx),
1961 };
1962 stats[stats_idx++] = (struct stats) {
1963 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1964 .value = cpu_to_be64(tx_bytes),
1965 .queue_id = cpu_to_be32(idx),
1966 };
1967 stats[stats_idx++] = (struct stats) {
1968 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1969 .value = cpu_to_be64(last_completion),
1970 .queue_id = cpu_to_be32(idx),
1971 };
1972 stats[stats_idx++] = (struct stats) {
1973 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1974 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1975 .queue_id = cpu_to_be32(idx),
1976 };
1977 }
1978 }
1979 /* rx stats */
1980 if (priv->rx) {
1981 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1982 stats[stats_idx++] = (struct stats) {
1983 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1984 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1985 .queue_id = cpu_to_be32(idx),
1986 };
1987 stats[stats_idx++] = (struct stats) {
1988 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1989 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1990 .queue_id = cpu_to_be32(idx),
1991 };
1992 }
1993 }
1994 }
1995
1996 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1997 static void gve_service_task(struct work_struct *work)
1998 {
1999 struct gve_priv *priv = container_of(work, struct gve_priv,
2000 service_task);
2001 u32 status = ioread32be(&priv->reg_bar0->device_status);
2002
2003 gve_handle_status(priv, status);
2004
2005 gve_handle_reset(priv);
2006 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2007 }
2008
gve_set_netdev_xdp_features(struct gve_priv * priv)2009 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2010 {
2011 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2012 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2013 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2014 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2015 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2016 } else {
2017 priv->dev->xdp_features = 0;
2018 }
2019 }
2020
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2021 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2022 {
2023 int num_ntfy;
2024 int err;
2025
2026 /* Set up the adminq */
2027 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2028 if (err) {
2029 dev_err(&priv->pdev->dev,
2030 "Failed to alloc admin queue: err=%d\n", err);
2031 return err;
2032 }
2033
2034 err = gve_verify_driver_compatibility(priv);
2035 if (err) {
2036 dev_err(&priv->pdev->dev,
2037 "Could not verify driver compatibility: err=%d\n", err);
2038 goto err;
2039 }
2040
2041 if (skip_describe_device)
2042 goto setup_device;
2043
2044 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2045 /* Get the initial information we need from the device */
2046 err = gve_adminq_describe_device(priv);
2047 if (err) {
2048 dev_err(&priv->pdev->dev,
2049 "Could not get device information: err=%d\n", err);
2050 goto err;
2051 }
2052 priv->dev->mtu = priv->dev->max_mtu;
2053 num_ntfy = pci_msix_vec_count(priv->pdev);
2054 if (num_ntfy <= 0) {
2055 dev_err(&priv->pdev->dev,
2056 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2057 err = num_ntfy;
2058 goto err;
2059 } else if (num_ntfy < GVE_MIN_MSIX) {
2060 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2061 GVE_MIN_MSIX, num_ntfy);
2062 err = -EINVAL;
2063 goto err;
2064 }
2065
2066 /* Big TCP is only supported on DQ*/
2067 if (!gve_is_gqi(priv))
2068 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2069
2070 priv->num_registered_pages = 0;
2071 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2072 /* gvnic has one Notification Block per MSI-x vector, except for the
2073 * management vector
2074 */
2075 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2076 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2077
2078 priv->tx_cfg.max_queues =
2079 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2080 priv->rx_cfg.max_queues =
2081 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2082
2083 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2084 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2085 if (priv->default_num_queues > 0) {
2086 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2087 priv->tx_cfg.num_queues);
2088 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2089 priv->rx_cfg.num_queues);
2090 }
2091
2092 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2093 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2094 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2095 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2096
2097 if (!gve_is_gqi(priv)) {
2098 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2099 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2100 }
2101
2102 setup_device:
2103 gve_set_netdev_xdp_features(priv);
2104 err = gve_setup_device_resources(priv);
2105 if (!err)
2106 return 0;
2107 err:
2108 gve_adminq_free(&priv->pdev->dev, priv);
2109 return err;
2110 }
2111
gve_teardown_priv_resources(struct gve_priv * priv)2112 static void gve_teardown_priv_resources(struct gve_priv *priv)
2113 {
2114 gve_teardown_device_resources(priv);
2115 gve_adminq_free(&priv->pdev->dev, priv);
2116 }
2117
gve_trigger_reset(struct gve_priv * priv)2118 static void gve_trigger_reset(struct gve_priv *priv)
2119 {
2120 /* Reset the device by releasing the AQ */
2121 gve_adminq_release(priv);
2122 }
2123
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2124 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2125 {
2126 gve_trigger_reset(priv);
2127 /* With the reset having already happened, close cannot fail */
2128 if (was_up)
2129 gve_close(priv->dev);
2130 gve_teardown_priv_resources(priv);
2131 }
2132
gve_reset_recovery(struct gve_priv * priv,bool was_up)2133 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2134 {
2135 int err;
2136
2137 err = gve_init_priv(priv, true);
2138 if (err)
2139 goto err;
2140 if (was_up) {
2141 err = gve_open(priv->dev);
2142 if (err)
2143 goto err;
2144 }
2145 return 0;
2146 err:
2147 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2148 gve_turndown(priv);
2149 return err;
2150 }
2151
gve_reset(struct gve_priv * priv,bool attempt_teardown)2152 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2153 {
2154 bool was_up = netif_carrier_ok(priv->dev);
2155 int err;
2156
2157 dev_info(&priv->pdev->dev, "Performing reset\n");
2158 gve_clear_do_reset(priv);
2159 gve_set_reset_in_progress(priv);
2160 /* If we aren't attempting to teardown normally, just go turndown and
2161 * reset right away.
2162 */
2163 if (!attempt_teardown) {
2164 gve_turndown(priv);
2165 gve_reset_and_teardown(priv, was_up);
2166 } else {
2167 /* Otherwise attempt to close normally */
2168 if (was_up) {
2169 err = gve_close(priv->dev);
2170 /* If that fails reset as we did above */
2171 if (err)
2172 gve_reset_and_teardown(priv, was_up);
2173 }
2174 /* Clean up any remaining resources */
2175 gve_teardown_priv_resources(priv);
2176 }
2177
2178 /* Set it all back up */
2179 err = gve_reset_recovery(priv, was_up);
2180 gve_clear_reset_in_progress(priv);
2181 priv->reset_cnt++;
2182 priv->interface_up_cnt = 0;
2183 priv->interface_down_cnt = 0;
2184 priv->stats_report_trigger_cnt = 0;
2185 return err;
2186 }
2187
gve_write_version(u8 __iomem * driver_version_register)2188 static void gve_write_version(u8 __iomem *driver_version_register)
2189 {
2190 const char *c = gve_version_prefix;
2191
2192 while (*c) {
2193 writeb(*c, driver_version_register);
2194 c++;
2195 }
2196
2197 c = gve_version_str;
2198 while (*c) {
2199 writeb(*c, driver_version_register);
2200 c++;
2201 }
2202 writeb('\n', driver_version_register);
2203 }
2204
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2205 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2206 {
2207 int max_tx_queues, max_rx_queues;
2208 struct net_device *dev;
2209 __be32 __iomem *db_bar;
2210 struct gve_registers __iomem *reg_bar;
2211 struct gve_priv *priv;
2212 int err;
2213
2214 err = pci_enable_device(pdev);
2215 if (err)
2216 return err;
2217
2218 err = pci_request_regions(pdev, gve_driver_name);
2219 if (err)
2220 goto abort_with_enabled;
2221
2222 pci_set_master(pdev);
2223
2224 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2225 if (err) {
2226 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2227 goto abort_with_pci_region;
2228 }
2229
2230 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2231 if (!reg_bar) {
2232 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2233 err = -ENOMEM;
2234 goto abort_with_pci_region;
2235 }
2236
2237 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2238 if (!db_bar) {
2239 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2240 err = -ENOMEM;
2241 goto abort_with_reg_bar;
2242 }
2243
2244 gve_write_version(®_bar->driver_version);
2245 /* Get max queues to alloc etherdev */
2246 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2247 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2248 /* Alloc and setup the netdev and priv */
2249 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2250 if (!dev) {
2251 dev_err(&pdev->dev, "could not allocate netdev\n");
2252 err = -ENOMEM;
2253 goto abort_with_db_bar;
2254 }
2255 SET_NETDEV_DEV(dev, &pdev->dev);
2256 pci_set_drvdata(pdev, dev);
2257 dev->ethtool_ops = &gve_ethtool_ops;
2258 dev->netdev_ops = &gve_netdev_ops;
2259
2260 /* Set default and supported features.
2261 *
2262 * Features might be set in other locations as well (such as
2263 * `gve_adminq_describe_device`).
2264 */
2265 dev->hw_features = NETIF_F_HIGHDMA;
2266 dev->hw_features |= NETIF_F_SG;
2267 dev->hw_features |= NETIF_F_HW_CSUM;
2268 dev->hw_features |= NETIF_F_TSO;
2269 dev->hw_features |= NETIF_F_TSO6;
2270 dev->hw_features |= NETIF_F_TSO_ECN;
2271 dev->hw_features |= NETIF_F_RXCSUM;
2272 dev->hw_features |= NETIF_F_RXHASH;
2273 dev->features = dev->hw_features;
2274 dev->watchdog_timeo = 5 * HZ;
2275 dev->min_mtu = ETH_MIN_MTU;
2276 netif_carrier_off(dev);
2277
2278 priv = netdev_priv(dev);
2279 priv->dev = dev;
2280 priv->pdev = pdev;
2281 priv->msg_enable = DEFAULT_MSG_LEVEL;
2282 priv->reg_bar0 = reg_bar;
2283 priv->db_bar2 = db_bar;
2284 priv->service_task_flags = 0x0;
2285 priv->state_flags = 0x0;
2286 priv->ethtool_flags = 0x0;
2287
2288 gve_set_probe_in_progress(priv);
2289 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2290 if (!priv->gve_wq) {
2291 dev_err(&pdev->dev, "Could not allocate workqueue");
2292 err = -ENOMEM;
2293 goto abort_with_netdev;
2294 }
2295 INIT_WORK(&priv->service_task, gve_service_task);
2296 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2297 priv->tx_cfg.max_queues = max_tx_queues;
2298 priv->rx_cfg.max_queues = max_rx_queues;
2299
2300 err = gve_init_priv(priv, false);
2301 if (err)
2302 goto abort_with_wq;
2303
2304 err = register_netdev(dev);
2305 if (err)
2306 goto abort_with_gve_init;
2307
2308 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2309 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2310 gve_clear_probe_in_progress(priv);
2311 queue_work(priv->gve_wq, &priv->service_task);
2312 return 0;
2313
2314 abort_with_gve_init:
2315 gve_teardown_priv_resources(priv);
2316
2317 abort_with_wq:
2318 destroy_workqueue(priv->gve_wq);
2319
2320 abort_with_netdev:
2321 free_netdev(dev);
2322
2323 abort_with_db_bar:
2324 pci_iounmap(pdev, db_bar);
2325
2326 abort_with_reg_bar:
2327 pci_iounmap(pdev, reg_bar);
2328
2329 abort_with_pci_region:
2330 pci_release_regions(pdev);
2331
2332 abort_with_enabled:
2333 pci_disable_device(pdev);
2334 return err;
2335 }
2336
gve_remove(struct pci_dev * pdev)2337 static void gve_remove(struct pci_dev *pdev)
2338 {
2339 struct net_device *netdev = pci_get_drvdata(pdev);
2340 struct gve_priv *priv = netdev_priv(netdev);
2341 __be32 __iomem *db_bar = priv->db_bar2;
2342 void __iomem *reg_bar = priv->reg_bar0;
2343
2344 unregister_netdev(netdev);
2345 gve_teardown_priv_resources(priv);
2346 destroy_workqueue(priv->gve_wq);
2347 free_netdev(netdev);
2348 pci_iounmap(pdev, db_bar);
2349 pci_iounmap(pdev, reg_bar);
2350 pci_release_regions(pdev);
2351 pci_disable_device(pdev);
2352 }
2353
gve_shutdown(struct pci_dev * pdev)2354 static void gve_shutdown(struct pci_dev *pdev)
2355 {
2356 struct net_device *netdev = pci_get_drvdata(pdev);
2357 struct gve_priv *priv = netdev_priv(netdev);
2358 bool was_up = netif_carrier_ok(priv->dev);
2359
2360 rtnl_lock();
2361 if (was_up && gve_close(priv->dev)) {
2362 /* If the dev was up, attempt to close, if close fails, reset */
2363 gve_reset_and_teardown(priv, was_up);
2364 } else {
2365 /* If the dev wasn't up or close worked, finish tearing down */
2366 gve_teardown_priv_resources(priv);
2367 }
2368 rtnl_unlock();
2369 }
2370
2371 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2372 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2373 {
2374 struct net_device *netdev = pci_get_drvdata(pdev);
2375 struct gve_priv *priv = netdev_priv(netdev);
2376 bool was_up = netif_carrier_ok(priv->dev);
2377
2378 priv->suspend_cnt++;
2379 rtnl_lock();
2380 if (was_up && gve_close(priv->dev)) {
2381 /* If the dev was up, attempt to close, if close fails, reset */
2382 gve_reset_and_teardown(priv, was_up);
2383 } else {
2384 /* If the dev wasn't up or close worked, finish tearing down */
2385 gve_teardown_priv_resources(priv);
2386 }
2387 priv->up_before_suspend = was_up;
2388 rtnl_unlock();
2389 return 0;
2390 }
2391
gve_resume(struct pci_dev * pdev)2392 static int gve_resume(struct pci_dev *pdev)
2393 {
2394 struct net_device *netdev = pci_get_drvdata(pdev);
2395 struct gve_priv *priv = netdev_priv(netdev);
2396 int err;
2397
2398 priv->resume_cnt++;
2399 rtnl_lock();
2400 err = gve_reset_recovery(priv, priv->up_before_suspend);
2401 rtnl_unlock();
2402 return err;
2403 }
2404 #endif /* CONFIG_PM */
2405
2406 static const struct pci_device_id gve_id_table[] = {
2407 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2408 { }
2409 };
2410
2411 static struct pci_driver gve_driver = {
2412 .name = gve_driver_name,
2413 .id_table = gve_id_table,
2414 .probe = gve_probe,
2415 .remove = gve_remove,
2416 .shutdown = gve_shutdown,
2417 #ifdef CONFIG_PM
2418 .suspend = gve_suspend,
2419 .resume = gve_resume,
2420 #endif
2421 };
2422
2423 module_pci_driver(gve_driver);
2424
2425 MODULE_DEVICE_TABLE(pci, gve_id_table);
2426 MODULE_AUTHOR("Google, Inc.");
2427 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2428 MODULE_LICENSE("Dual MIT/GPL");
2429 MODULE_VERSION(GVE_VERSION);
2430