1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2 
3 #include <linux/dma-mapping.h>
4 #include <linux/ip.h>
5 #include <linux/pci.h>
6 #include <linux/skbuff.h>
7 #include <linux/tcp.h>
8 #include <uapi/linux/udp.h>
9 #include "funeth.h"
10 #include "funeth_ktls.h"
11 #include "funeth_txrx.h"
12 #include "funeth_trace.h"
13 #include "fun_queue.h"
14 
15 #define FUN_XDP_CLEAN_THRES 32
16 #define FUN_XDP_CLEAN_BATCH 16
17 
18 /* DMA-map a packet and return the (length, DMA_address) pairs for its
19  * segments. If a mapping error occurs -ENOMEM is returned.
20  */
21 static int map_skb(const struct sk_buff *skb, struct device *dev,
22 		   dma_addr_t *addr, unsigned int *len)
23 {
24 	const struct skb_shared_info *si;
25 	const skb_frag_t *fp, *end;
26 
27 	*len = skb_headlen(skb);
28 	*addr = dma_map_single(dev, skb->data, *len, DMA_TO_DEVICE);
29 	if (dma_mapping_error(dev, *addr))
30 		return -ENOMEM;
31 
32 	si = skb_shinfo(skb);
33 	end = &si->frags[si->nr_frags];
34 
35 	for (fp = si->frags; fp < end; fp++) {
36 		*++len = skb_frag_size(fp);
37 		*++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE);
38 		if (dma_mapping_error(dev, *addr))
39 			goto unwind;
40 	}
41 	return 0;
42 
43 unwind:
44 	while (fp-- > si->frags)
45 		dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
46 
47 	dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
48 	return -ENOMEM;
49 }
50 
51 /* Return the address just past the end of a Tx queue's descriptor ring.
52  * It exploits the fact that the HW writeback area is just after the end
53  * of the descriptor ring.
54  */
55 static void *txq_end(const struct funeth_txq *q)
56 {
57 	return (void *)q->hw_wb;
58 }
59 
60 /* Return the amount of space within a Tx ring from the given address to the
61  * end.
62  */
63 static unsigned int txq_to_end(const struct funeth_txq *q, void *p)
64 {
65 	return txq_end(q) - p;
66 }
67 
68 /* Return the number of Tx descriptors occupied by a Tx request. */
69 static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req)
70 {
71 	return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8);
72 }
73 
74 static __be16 tcp_hdr_doff_flags(const struct tcphdr *th)
75 {
76 	return *(__be16 *)&tcp_flag_word(th);
77 }
78 
79 static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q,
80 				  unsigned int *tls_len)
81 {
82 #if IS_ENABLED(CONFIG_TLS_DEVICE)
83 	const struct fun_ktls_tx_ctx *tls_ctx;
84 	u32 datalen, seq;
85 
86 	datalen = skb->len - skb_tcp_all_headers(skb);
87 	if (!datalen)
88 		return skb;
89 
90 	if (likely(!tls_offload_tx_resync_pending(skb->sk))) {
91 		seq = ntohl(tcp_hdr(skb)->seq);
92 		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
93 
94 		if (likely(tls_ctx->next_seq == seq)) {
95 			*tls_len = datalen;
96 			return skb;
97 		}
98 		if (seq - tls_ctx->next_seq < U32_MAX / 4) {
99 			tls_offload_tx_resync_request(skb->sk, seq,
100 						      tls_ctx->next_seq);
101 		}
102 	}
103 
104 	FUN_QSTAT_INC(q, tx_tls_fallback);
105 	skb = tls_encrypt_skb(skb);
106 	if (!skb)
107 		FUN_QSTAT_INC(q, tx_tls_drops);
108 
109 	return skb;
110 #else
111 	return NULL;
112 #endif
113 }
114 
115 /* Write as many descriptors as needed for the supplied skb starting at the
116  * current producer location. The caller has made certain enough descriptors
117  * are available.
118  *
119  * Returns the number of descriptors written, 0 on error.
120  */
121 static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
122 				   unsigned int tls_len)
123 {
124 	unsigned int extra_bytes = 0, extra_pkts = 0;
125 	unsigned int idx = q->prod_cnt & q->mask;
126 	const struct skb_shared_info *shinfo;
127 	unsigned int lens[MAX_SKB_FRAGS + 1];
128 	dma_addr_t addrs[MAX_SKB_FRAGS + 1];
129 	struct fun_eth_tx_req *req;
130 	struct fun_dataop_gl *gle;
131 	const struct tcphdr *th;
132 	unsigned int ngle, i;
133 	unsigned int l4_hlen;
134 	u16 flags;
135 
136 	if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) {
137 		FUN_QSTAT_INC(q, tx_map_err);
138 		return 0;
139 	}
140 
141 	req = fun_tx_desc_addr(q, idx);
142 	req->op = FUN_ETH_OP_TX;
143 	req->len8 = 0;
144 	req->flags = 0;
145 	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
146 	req->repr_idn = 0;
147 	req->encap_proto = 0;
148 
149 	shinfo = skb_shinfo(skb);
150 	if (likely(shinfo->gso_size)) {
151 		if (skb->encapsulation) {
152 			u16 ol4_ofst;
153 
154 			flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO |
155 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
156 				FUN_ETH_UPDATE_OUTER_L3_LEN;
157 			if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
158 						SKB_GSO_UDP_TUNNEL_CSUM)) {
159 				flags |= FUN_ETH_UPDATE_OUTER_L4_LEN |
160 					 FUN_ETH_OUTER_UDP;
161 				if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
162 					flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM;
163 				ol4_ofst = skb_transport_offset(skb);
164 			} else {
165 				ol4_ofst = skb_inner_network_offset(skb);
166 			}
167 
168 			if (ip_hdr(skb)->version == 4)
169 				flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM;
170 			else
171 				flags |= FUN_ETH_OUTER_IPV6;
172 
173 			if (skb->inner_network_header) {
174 				if (inner_ip_hdr(skb)->version == 4)
175 					flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM |
176 						 FUN_ETH_UPDATE_INNER_L3_LEN;
177 				else
178 					flags |= FUN_ETH_INNER_IPV6 |
179 						 FUN_ETH_UPDATE_INNER_L3_LEN;
180 			}
181 			th = inner_tcp_hdr(skb);
182 			l4_hlen = __tcp_hdrlen(th);
183 			fun_eth_offload_init(&req->offload, flags,
184 					     shinfo->gso_size,
185 					     tcp_hdr_doff_flags(th), 0,
186 					     skb_inner_network_offset(skb),
187 					     skb_inner_transport_offset(skb),
188 					     skb_network_offset(skb), ol4_ofst);
189 			FUN_QSTAT_INC(q, tx_encap_tso);
190 		} else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
191 			flags = FUN_ETH_INNER_LSO | FUN_ETH_INNER_UDP |
192 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
193 				FUN_ETH_UPDATE_INNER_L4_LEN |
194 				FUN_ETH_UPDATE_INNER_L3_LEN;
195 
196 			if (ip_hdr(skb)->version == 4)
197 				flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
198 			else
199 				flags |= FUN_ETH_INNER_IPV6;
200 
201 			l4_hlen = sizeof(struct udphdr);
202 			fun_eth_offload_init(&req->offload, flags,
203 					     shinfo->gso_size,
204 					     cpu_to_be16(l4_hlen << 10), 0,
205 					     skb_network_offset(skb),
206 					     skb_transport_offset(skb), 0, 0);
207 			FUN_QSTAT_INC(q, tx_uso);
208 		} else {
209 			/* HW considers one set of headers as inner */
210 			flags = FUN_ETH_INNER_LSO |
211 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
212 				FUN_ETH_UPDATE_INNER_L3_LEN;
213 			if (shinfo->gso_type & SKB_GSO_TCPV6)
214 				flags |= FUN_ETH_INNER_IPV6;
215 			else
216 				flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
217 			th = tcp_hdr(skb);
218 			l4_hlen = __tcp_hdrlen(th);
219 			fun_eth_offload_init(&req->offload, flags,
220 					     shinfo->gso_size,
221 					     tcp_hdr_doff_flags(th), 0,
222 					     skb_network_offset(skb),
223 					     skb_transport_offset(skb), 0, 0);
224 			FUN_QSTAT_INC(q, tx_tso);
225 		}
226 
227 		u64_stats_update_begin(&q->syncp);
228 		q->stats.tx_cso += shinfo->gso_segs;
229 		u64_stats_update_end(&q->syncp);
230 
231 		extra_pkts = shinfo->gso_segs - 1;
232 		extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) +
233 			       l4_hlen) * extra_pkts;
234 	} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
235 		flags = FUN_ETH_UPDATE_INNER_L4_CKSUM;
236 		if (skb->csum_offset == offsetof(struct udphdr, check))
237 			flags |= FUN_ETH_INNER_UDP;
238 		fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0,
239 				     skb_checksum_start_offset(skb), 0, 0);
240 		FUN_QSTAT_INC(q, tx_cso);
241 	} else {
242 		fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
243 	}
244 
245 	ngle = shinfo->nr_frags + 1;
246 	req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8;
247 	req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len);
248 
249 	for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm;
250 	     i < ngle && txq_to_end(q, gle); i++, gle++)
251 		fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
252 
253 	if (txq_to_end(q, gle) == 0) {
254 		gle = (struct fun_dataop_gl *)q->desc;
255 		for ( ; i < ngle; i++, gle++)
256 			fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
257 	}
258 
259 	if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) {
260 		struct fun_eth_tls *tls = (struct fun_eth_tls *)gle;
261 		struct fun_ktls_tx_ctx *tls_ctx;
262 
263 		req->len8 += FUNETH_TLS_SZ / 8;
264 		req->flags = cpu_to_be16(FUN_ETH_TX_TLS);
265 
266 		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
267 		tls->tlsid = tls_ctx->tlsid;
268 		tls_ctx->next_seq += tls_len;
269 
270 		u64_stats_update_begin(&q->syncp);
271 		q->stats.tx_tls_bytes += tls_len;
272 		q->stats.tx_tls_pkts += 1 + extra_pkts;
273 		u64_stats_update_end(&q->syncp);
274 	}
275 
276 	u64_stats_update_begin(&q->syncp);
277 	q->stats.tx_bytes += skb->len + extra_bytes;
278 	q->stats.tx_pkts += 1 + extra_pkts;
279 	u64_stats_update_end(&q->syncp);
280 
281 	q->info[idx].skb = skb;
282 
283 	trace_funeth_tx(q, skb->len, idx, req->dataop.ngather);
284 	return tx_req_ndesc(req);
285 }
286 
287 /* Return the number of available descriptors of a Tx queue.
288  * HW assumes head==tail means the ring is empty so we need to keep one
289  * descriptor unused.
290  */
291 static unsigned int fun_txq_avail(const struct funeth_txq *q)
292 {
293 	return q->mask - q->prod_cnt + q->cons_cnt;
294 }
295 
296 /* Stop a queue if it can't handle another worst-case packet. */
297 static void fun_tx_check_stop(struct funeth_txq *q)
298 {
299 	if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC))
300 		return;
301 
302 	netif_tx_stop_queue(q->ndq);
303 
304 	/* NAPI reclaim is freeing packets in parallel with us and we may race.
305 	 * We have stopped the queue but check again after synchronizing with
306 	 * reclaim.
307 	 */
308 	smp_mb();
309 	if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC))
310 		FUN_QSTAT_INC(q, tx_nstops);
311 	else
312 		netif_tx_start_queue(q->ndq);
313 }
314 
315 /* Return true if a queue has enough space to restart. Current condition is
316  * that the queue must be >= 1/4 empty.
317  */
318 static bool fun_txq_may_restart(struct funeth_txq *q)
319 {
320 	return fun_txq_avail(q) >= q->mask / 4;
321 }
322 
323 netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev)
324 {
325 	struct funeth_priv *fp = netdev_priv(netdev);
326 	unsigned int qid = skb_get_queue_mapping(skb);
327 	struct funeth_txq *q = fp->txqs[qid];
328 	unsigned int tls_len = 0;
329 	unsigned int ndesc;
330 
331 	if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk &&
332 	    tls_is_sk_tx_device_offloaded(skb->sk)) {
333 		skb = fun_tls_tx(skb, q, &tls_len);
334 		if (unlikely(!skb))
335 			goto dropped;
336 	}
337 
338 	ndesc = write_pkt_desc(skb, q, tls_len);
339 	if (unlikely(!ndesc)) {
340 		dev_kfree_skb_any(skb);
341 		goto dropped;
342 	}
343 
344 	q->prod_cnt += ndesc;
345 	fun_tx_check_stop(q);
346 
347 	skb_tx_timestamp(skb);
348 
349 	if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more()))
350 		fun_txq_wr_db(q);
351 	else
352 		FUN_QSTAT_INC(q, tx_more);
353 
354 	return NETDEV_TX_OK;
355 
356 dropped:
357 	/* A dropped packet may be the last one in a xmit_more train,
358 	 * ring the doorbell just in case.
359 	 */
360 	if (!netdev_xmit_more())
361 		fun_txq_wr_db(q);
362 	return NETDEV_TX_OK;
363 }
364 
365 /* Return a Tx queue's HW head index written back to host memory. */
366 static u16 txq_hw_head(const struct funeth_txq *q)
367 {
368 	return (u16)be64_to_cpu(*q->hw_wb);
369 }
370 
371 /* Unmap the Tx packet starting at the given descriptor index and
372  * return the number of Tx descriptors it occupied.
373  */
374 static unsigned int unmap_skb(const struct funeth_txq *q, unsigned int idx)
375 {
376 	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
377 	unsigned int ngle = req->dataop.ngather;
378 	struct fun_dataop_gl *gle;
379 
380 	if (ngle) {
381 		gle = (struct fun_dataop_gl *)req->dataop.imm;
382 		dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
383 				 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
384 
385 		for (gle++; --ngle && txq_to_end(q, gle); gle++)
386 			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
387 				       be32_to_cpu(gle->sgl_len),
388 				       DMA_TO_DEVICE);
389 
390 		for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++)
391 			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
392 				       be32_to_cpu(gle->sgl_len),
393 				       DMA_TO_DEVICE);
394 	}
395 
396 	return tx_req_ndesc(req);
397 }
398 
399 /* Reclaim completed Tx descriptors and free their packets. Restart a stopped
400  * queue if we freed enough descriptors.
401  *
402  * Return true if we exhausted the budget while there is more work to be done.
403  */
404 static bool fun_txq_reclaim(struct funeth_txq *q, int budget)
405 {
406 	unsigned int npkts = 0, nbytes = 0, ndesc = 0;
407 	unsigned int head, limit, reclaim_idx;
408 
409 	/* budget may be 0, e.g., netpoll */
410 	limit = budget ? budget : UINT_MAX;
411 
412 	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
413 	     head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) {
414 		/* The HW head is continually updated, ensure we don't read
415 		 * descriptor state before the head tells us to reclaim it.
416 		 * On the enqueue side the doorbell is an implicit write
417 		 * barrier.
418 		 */
419 		rmb();
420 
421 		do {
422 			unsigned int pkt_desc = unmap_skb(q, reclaim_idx);
423 			struct sk_buff *skb = q->info[reclaim_idx].skb;
424 
425 			trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
426 
427 			nbytes += skb->len;
428 			napi_consume_skb(skb, budget);
429 			ndesc += pkt_desc;
430 			reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
431 			npkts++;
432 		} while (reclaim_idx != head && npkts < limit);
433 	}
434 
435 	q->cons_cnt += ndesc;
436 	netdev_tx_completed_queue(q->ndq, npkts, nbytes);
437 	smp_mb(); /* pairs with the one in fun_tx_check_stop() */
438 
439 	if (unlikely(netif_tx_queue_stopped(q->ndq) &&
440 		     fun_txq_may_restart(q))) {
441 		netif_tx_wake_queue(q->ndq);
442 		FUN_QSTAT_INC(q, tx_nrestarts);
443 	}
444 
445 	return reclaim_idx != head;
446 }
447 
448 /* The NAPI handler for Tx queues. */
449 int fun_txq_napi_poll(struct napi_struct *napi, int budget)
450 {
451 	struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
452 	struct funeth_txq *q = irq->txq;
453 	unsigned int db_val;
454 
455 	if (fun_txq_reclaim(q, budget))
456 		return budget;               /* exhausted budget */
457 
458 	napi_complete(napi);                 /* exhausted pending work */
459 	db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask);
460 	writel(db_val, q->db);
461 	return 0;
462 }
463 
464 static void fun_xdp_unmap(const struct funeth_txq *q, unsigned int idx)
465 {
466 	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
467 	const struct fun_dataop_gl *gle;
468 
469 	gle = (const struct fun_dataop_gl *)req->dataop.imm;
470 	dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
471 			 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
472 }
473 
474 /* Reclaim up to @budget completed Tx descriptors from a TX XDP queue. */
475 static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
476 {
477 	unsigned int npkts = 0, head, reclaim_idx;
478 
479 	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
480 	     head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) {
481 		/* The HW head is continually updated, ensure we don't read
482 		 * descriptor state before the head tells us to reclaim it.
483 		 * On the enqueue side the doorbell is an implicit write
484 		 * barrier.
485 		 */
486 		rmb();
487 
488 		do {
489 			fun_xdp_unmap(q, reclaim_idx);
490 			page_frag_free(q->info[reclaim_idx].vaddr);
491 
492 			trace_funeth_tx_free(q, reclaim_idx, 1, head);
493 
494 			reclaim_idx = (reclaim_idx + 1) & q->mask;
495 			npkts++;
496 		} while (reclaim_idx != head && npkts < budget);
497 	}
498 
499 	q->cons_cnt += npkts;
500 	return npkts;
501 }
502 
503 bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
504 {
505 	struct fun_eth_tx_req *req;
506 	struct fun_dataop_gl *gle;
507 	unsigned int idx;
508 	dma_addr_t dma;
509 
510 	if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
511 		fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH);
512 
513 	if (!unlikely(fun_txq_avail(q))) {
514 		FUN_QSTAT_INC(q, tx_xdp_full);
515 		return false;
516 	}
517 
518 	dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
519 	if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
520 		FUN_QSTAT_INC(q, tx_map_err);
521 		return false;
522 	}
523 
524 	idx = q->prod_cnt & q->mask;
525 	req = fun_tx_desc_addr(q, idx);
526 	req->op = FUN_ETH_OP_TX;
527 	req->len8 = (sizeof(*req) + sizeof(*gle)) / 8;
528 	req->flags = 0;
529 	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
530 	req->repr_idn = 0;
531 	req->encap_proto = 0;
532 	fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
533 	req->dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
534 
535 	gle = (struct fun_dataop_gl *)req->dataop.imm;
536 	fun_dataop_gl_init(gle, 0, 0, len, dma);
537 
538 	q->info[idx].vaddr = data;
539 
540 	u64_stats_update_begin(&q->syncp);
541 	q->stats.tx_bytes += len;
542 	q->stats.tx_pkts++;
543 	u64_stats_update_end(&q->syncp);
544 
545 	trace_funeth_tx(q, len, idx, 1);
546 	q->prod_cnt++;
547 
548 	return true;
549 }
550 
551 int fun_xdp_xmit_frames(struct net_device *dev, int n,
552 			struct xdp_frame **frames, u32 flags)
553 {
554 	struct funeth_priv *fp = netdev_priv(dev);
555 	struct funeth_txq *q, **xdpqs;
556 	int i, q_idx;
557 
558 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
559 		return -EINVAL;
560 
561 	xdpqs = rcu_dereference_bh(fp->xdpqs);
562 	if (unlikely(!xdpqs))
563 		return -ENETDOWN;
564 
565 	q_idx = smp_processor_id();
566 	if (unlikely(q_idx >= fp->num_xdpqs))
567 		return -ENXIO;
568 
569 	for (q = xdpqs[q_idx], i = 0; i < n; i++) {
570 		const struct xdp_frame *xdpf = frames[i];
571 
572 		if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
573 			break;
574 	}
575 
576 	if (unlikely(flags & XDP_XMIT_FLUSH))
577 		fun_txq_wr_db(q);
578 	return i;
579 }
580 
581 /* Purge a Tx queue of any queued packets. Should be called once HW access
582  * to the packets has been revoked, e.g., after the queue has been disabled.
583  */
584 static void fun_txq_purge(struct funeth_txq *q)
585 {
586 	while (q->cons_cnt != q->prod_cnt) {
587 		unsigned int idx = q->cons_cnt & q->mask;
588 
589 		q->cons_cnt += unmap_skb(q, idx);
590 		dev_kfree_skb_any(q->info[idx].skb);
591 	}
592 	netdev_tx_reset_queue(q->ndq);
593 }
594 
595 static void fun_xdpq_purge(struct funeth_txq *q)
596 {
597 	while (q->cons_cnt != q->prod_cnt) {
598 		unsigned int idx = q->cons_cnt & q->mask;
599 
600 		fun_xdp_unmap(q, idx);
601 		page_frag_free(q->info[idx].vaddr);
602 		q->cons_cnt++;
603 	}
604 }
605 
606 /* Create a Tx queue, allocating all the host resources needed. */
607 static struct funeth_txq *fun_txq_create_sw(struct net_device *dev,
608 					    unsigned int qidx,
609 					    unsigned int ndesc,
610 					    struct fun_irq *irq)
611 {
612 	struct funeth_priv *fp = netdev_priv(dev);
613 	struct funeth_txq *q;
614 	int numa_node;
615 
616 	if (irq)
617 		numa_node = fun_irq_node(irq); /* skb Tx queue */
618 	else
619 		numa_node = cpu_to_node(qidx); /* XDP Tx queue */
620 
621 	q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
622 	if (!q)
623 		goto err;
624 
625 	q->dma_dev = &fp->pdev->dev;
626 	q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE,
627 				     sizeof(*q->info), true, numa_node,
628 				     &q->dma_addr, (void **)&q->info,
629 				     &q->hw_wb);
630 	if (!q->desc)
631 		goto free_q;
632 
633 	q->netdev = dev;
634 	q->mask = ndesc - 1;
635 	q->qidx = qidx;
636 	q->numa_node = numa_node;
637 	u64_stats_init(&q->syncp);
638 	q->init_state = FUN_QSTATE_INIT_SW;
639 	return q;
640 
641 free_q:
642 	kfree(q);
643 err:
644 	netdev_err(dev, "Can't allocate memory for %s queue %u\n",
645 		   irq ? "Tx" : "XDP", qidx);
646 	return NULL;
647 }
648 
649 static void fun_txq_free_sw(struct funeth_txq *q)
650 {
651 	struct funeth_priv *fp = netdev_priv(q->netdev);
652 
653 	fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true,
654 			  q->desc, q->dma_addr, q->info);
655 
656 	fp->tx_packets += q->stats.tx_pkts;
657 	fp->tx_bytes   += q->stats.tx_bytes;
658 	fp->tx_dropped += q->stats.tx_map_err;
659 
660 	kfree(q);
661 }
662 
663 /* Allocate the device portion of a Tx queue. */
664 int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq)
665 {
666 	struct funeth_priv *fp = netdev_priv(q->netdev);
667 	unsigned int irq_idx, ndesc = q->mask + 1;
668 	int err;
669 
670 	q->irq = irq;
671 	*q->hw_wb = 0;
672 	q->prod_cnt = 0;
673 	q->cons_cnt = 0;
674 	irq_idx = irq ? irq->irq_idx : 0;
675 
676 	err = fun_sq_create(fp->fdev,
677 			    FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS |
678 			    FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0,
679 			    FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc,
680 			    q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec,
681 			    irq_idx, 0, fp->fdev->kern_end_qid, 0,
682 			    &q->hw_qid, &q->db);
683 	if (err)
684 		goto out;
685 
686 	err = fun_create_and_bind_tx(fp, q->hw_qid);
687 	if (err < 0)
688 		goto free_devq;
689 	q->ethid = err;
690 
691 	if (irq) {
692 		irq->txq = q;
693 		q->ndq = netdev_get_tx_queue(q->netdev, q->qidx);
694 		q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec,
695 					      fp->tx_coal_count);
696 		writel(q->irq_db_val, q->db);
697 	}
698 
699 	q->init_state = FUN_QSTATE_INIT_FULL;
700 	netif_info(fp, ifup, q->netdev,
701 		   "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n",
702 		   irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx,
703 		   q->ethid, q->numa_node);
704 	return 0;
705 
706 free_devq:
707 	fun_destroy_sq(fp->fdev, q->hw_qid);
708 out:
709 	netdev_err(q->netdev,
710 		   "Failed to create %s queue %u on device, error %d\n",
711 		   irq ? "Tx" : "XDP", q->qidx, err);
712 	return err;
713 }
714 
715 static void fun_txq_free_dev(struct funeth_txq *q)
716 {
717 	struct funeth_priv *fp = netdev_priv(q->netdev);
718 
719 	if (q->init_state < FUN_QSTATE_INIT_FULL)
720 		return;
721 
722 	netif_info(fp, ifdown, q->netdev,
723 		   "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n",
724 		   q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid,
725 		   q->irq ? q->irq->irq_idx : 0, q->ethid);
726 
727 	fun_destroy_sq(fp->fdev, q->hw_qid);
728 	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid);
729 
730 	if (q->irq) {
731 		q->irq->txq = NULL;
732 		fun_txq_purge(q);
733 	} else {
734 		fun_xdpq_purge(q);
735 	}
736 
737 	q->init_state = FUN_QSTATE_INIT_SW;
738 }
739 
740 /* Create or advance a Tx queue, allocating all the host and device resources
741  * needed to reach the target state.
742  */
743 int funeth_txq_create(struct net_device *dev, unsigned int qidx,
744 		      unsigned int ndesc, struct fun_irq *irq, int state,
745 		      struct funeth_txq **qp)
746 {
747 	struct funeth_txq *q = *qp;
748 	int err;
749 
750 	if (!q)
751 		q = fun_txq_create_sw(dev, qidx, ndesc, irq);
752 	if (!q)
753 		return -ENOMEM;
754 
755 	if (q->init_state >= state)
756 		goto out;
757 
758 	err = fun_txq_create_dev(q, irq);
759 	if (err) {
760 		if (!*qp)
761 			fun_txq_free_sw(q);
762 		return err;
763 	}
764 
765 out:
766 	*qp = q;
767 	return 0;
768 }
769 
770 /* Free Tx queue resources until it reaches the target state.
771  * The queue must be already disconnected from the stack.
772  */
773 struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state)
774 {
775 	if (state < FUN_QSTATE_INIT_FULL)
776 		fun_txq_free_dev(q);
777 
778 	if (state == FUN_QSTATE_DESTROYED) {
779 		fun_txq_free_sw(q);
780 		q = NULL;
781 	}
782 
783 	return q;
784 }
785